# data processing 
library(readxl)
library(tidyverse)
library(plyr)          # rbind
library(dplyr)
library(readr)
library(purrr)
library(skimr)

# table & formatting
library(kableExtra) 
library(knitr)
library(data.table)
library(modelsummary)
library(sjPlot)
library(janitor)

# visualization
library(ggplot2)
library(gridExtra)
library(ggpubr)
library(plotly)
library(ggfortify)

# data & regression
library(here)
library(psych)        # corPlot
library(lares)
library(plm)          # panel data
library(lmtest)
library(car)

PREFACE

This is an individual analysis conducted by Nguyen Thuy Duong, a final-year student undertaking an Honors Bachelor degree in Economics and Business Economics and a Dedicated minor in Applied Data Science at Utrecht University. The research question is The Impact of Green Innovations on Firm Value in the Biotech sector in Europe: A Path to Efficient and Sustainable Growth. The research is completed under the supervision of Prof. dr. Wouter Botzen and PhD. Brian Colgan.

The analysis is organized as follows. The first section is data processing, including merging data sets and checking data quality. The data is explained and statistically described in Sections 2 and 3. Section 4 applies the Pooled OLS Fixed Effects Regressions to identify the relationship between the dependent variable, \(Firm \ value\), and the explanatory variable, \(Green \ innovation \ (GI)\). In this section, robustness checks are performed. While each section includes some explanations and sub-conclusions, key interpretations are provided in Section 6. Some of these conclusions are highlighted and linked to current literature and theories in the Word file.

Keywords: Green innovation (GI), biotechnology, sustainable development, patent data, Fixed Effects Regression.


Section I: Data Preparation

Patent Data

# Set path
path = "C:/Users/HP/Documents/Utrecht University/UU 2021 - 2022/Thesis/Patent Application"

# Compile all files
patent <- list.files(path = path,
                     pattern = "*.csv", 
                     full.names = TRUE) %>%
  lapply(read_csv) %>%
  bind_rows()
# Rename columns
names(patent)[1:3] <- c("Count", "Year", "Type")
# Create 'Age' & 'Location' variables & subsets 
   # patent counts
patent_counts <- patent %>% 
  filter(Type == "Granted Patent") %>%
  mutate(Age = Year - Establishment) %>%
  separate(Firm_id, 
           sep = "-", 
           c( 'Firm', 'Location'), 
           remove = F) %>% 
  select(-c(Type, Establishment))

   # patent applications
patent_app <- patent %>% 
  filter(Type == "Patent Application") %>%
  mutate(Age = Year - Establishment) %>% 
  select(-c(Type, Establishment))

Financial Data

Combine all sheets in Excel workbook

# Set path 
path <- "C:/Users/HP/Documents/Utrecht University/UU 2021 - 2022/Thesis/EU Firm data"
  
# Set the working directory 
setwd(path)
  
# Accessing all the sheets 
sheet = excel_sheets("Financial_dta.xlsx")
  
# Applying sheet names to dataframe names
data_list = lapply(setNames(sheet, sheet), 
                    function(x) read_excel("Financial_dta.xlsx", sheet=x))

# Compile all dataframes
fin_data = bind_rows(data_list, .id="Sheet")

Pivot data

# Rename columns
names(fin_data)[1:2] <- c("Firm_id", "Feature")

# Rrop unnecessary columns that contains sheet names
fin_data <- fin_data[, 1:16]

# Replace row values
fin_data$Feature <- rep(c("Sales", "Sales_growth", "Gross_Income", "GIncome_growth", "Gross_margin", 
                              "EBIT", "EBIT_growth", "EBIT_margin", "EBITDA", "EBITDA_growth",
                              "EBITDA_margin", "Net_income", "NIncome_growth", "Net_margin", "Balance_sheet",
                              "CSI", "CSI_growth", "CSI/TAssets", "Total_assets", "TA_growth", 
                              "Asset_turnover", "ROA", "Total_debt", "Tdebt_growth", "Leverage", 
                              "Tdebt/Tequity", "Net_debt", "Net_debt_growth", "Ndebt/Tequity", "Total_liab", 
                              "Tliab_growth", "Tshare_equity", "TSE_growth", "Firm_value", "ROE"), 52)


# Convert to panel data
   # 'Year' columns into a single long column 
finance <- pivot_longer(data = fin_data,
                        cols = '2006':'2019',
                        names_to = "Year",
                        values_to = "Amount",
                        values_drop_na = FALSE,
                        names_repair = "minimal")  # accept duplicated names
   # pivot wide
finance <- pivot_wider(data = finance,
                       names_from = Feature, 
                       values_from = Amount)

Relevant variables

# Keep relevant variables
finance <- finance %>%
   select(c("Firm_id", "Year","Firm_value", "Sales", "Sales_growth", 
            "Total_assets", "Total_debt", "Leverage", 
            "Tshare_equity", "ROA", "ROE"))

Merging Data

# Vertically merge
merged <- merge(patent_counts, finance, by = c("Firm_id", "Year"))

Missing values

  • Missing values exist, but at a small proportion (5.6%), meaning that removing them might not substantially affect the analysis results.
# Detecting NA using summary table
summary(merged)
##    Firm_id               Year          Count            Firm          
##  Length:720         Min.   :2006   Min.   :   0.0   Length:720        
##  Class :character   1st Qu.:2009   1st Qu.:  10.0   Class :character  
##  Mode  :character   Median :2012   Median :  35.0   Mode  :character  
##                     Mean   :2012   Mean   : 495.8                     
##                     3rd Qu.:2016   3rd Qu.: 235.5                     
##                     Max.   :2019   Max.   :5616.0                     
##                                                                       
##    Location              Age          Firm_value          Sales         
##  Length:720         Min.   :  0.0   Min.   :-535.06   Min.   :     0.0  
##  Class :character   1st Qu.: 12.0   1st Qu.:  49.30   1st Qu.:   126.2  
##  Mode  :character   Median : 16.0   Median :  69.59   Median :  2148.5  
##                     Mean   : 23.9   Mean   :  61.11   Mean   : 10592.0  
##                     3rd Qu.: 22.0   3rd Qu.:  83.87   3rd Qu.: 12469.0  
##                     Max.   :123.0   Max.   :  99.15   Max.   :194033.2  
##                                     NA's   :3         NA's   :3         
##   Sales_growth       Total_assets        Total_debt           Leverage      
##  Min.   : -100.00   Min.   :    14.9   Min.   :     0.00   Min.   :  0.000  
##  1st Qu.:  -17.61   1st Qu.:  2053.6   1st Qu.:     0.00   1st Qu.:  0.000  
##  Median :    7.90   Median : 25882.5   Median :    85.11   Median :  2.259  
##  Mean   :  338.81   Mean   : 45287.5   Mean   :  5680.74   Mean   : 13.931  
##  3rd Qu.:   42.73   3rd Qu.: 59011.1   3rd Qu.:  4291.75   3rd Qu.: 15.963  
##  Max.   :94150.00   Max.   :625686.0   Max.   :183618.00   Max.   :564.704  
##  NA's   :52         NA's   :3          NA's   :4           NA's   :4        
##  Tshare_equity           ROA                ROE           
##  Min.   :-27210.9   Min.   :-262.429   Min.   :-2833.752  
##  1st Qu.:   787.3   1st Qu.: -37.637   1st Qu.:  -53.558  
##  Median : 12857.3   Median : -17.309   Median :  -24.037  
##  Mean   : 29239.3   Mean   : -21.850   Mean   :  -38.915  
##  3rd Qu.: 36386.8   3rd Qu.:   2.143   3rd Qu.:    4.239  
##  Max.   :470117.0   Max.   :  71.200   Max.   :  290.151  
##  NA's   :3          NA's   :5          NA's   :25
# Compute proportion of missing values (~ 5.6%)
sum(is.na(merged))/ (nrow(merged) * ncol(merged)) * 100
## [1] 0.9444444

Solutions

# Drop NA
data <- merged[complete.cases(merged), ]

# Variable classification & create new variables
   # checking
data.frame(lapply(data, class))
##     Firm_id    Year   Count      Firm  Location     Age Firm_value   Sales
## 1 character numeric numeric character character numeric    numeric numeric
##   Sales_growth Total_assets Total_debt Leverage Tshare_equity     ROA     ROE
## 1      numeric      numeric    numeric  numeric       numeric numeric numeric
   # 'Year' to numeric & new variables
data <- data %>% mutate(Year = as.numeric(Year),
                        Green_inn = log(1 + Count) %>% round(4),
                        Size = log(Total_assets))%>% 
   
   # select & reorder relevant variables
                 select(Firm_id, Firm, Year, Firm_value, Count, Green_inn, everything())

Section II: Data Description

Table of description

# Create a table
table1 <- data.table(
  '<b>Statistics' = c("Firm_value", "Count" , "Green_inn", "Age", "Sales", "Sale_growth",
                      "Total_asset", "Total_debt", "Leverage", "Tshare_equity",
                      "ROA", "ROE", "Size", "Location", "Year"),
  
  '<b>Variables' = c("Firm value", "Patent count", "Green innovation", "Firm age",
                     "Annual sales", "Annual sales growth", "Total assets", "Total debt",
                     "Leverage ratio", "Total shareholders' equity", "Return on Asset", 
                     "Return on Equity", "Firm size", "Country", "Year"),
  
  '<b>Description' = c("Tobin’s q",
                  "Number of patents granted",
                  "Indicated by % change in number of patents",
                  "The log value of operating years since the firm’s establishment",
                  "The current operating income",
                  "The rate of increase in annual sales",
                  "Total amount of assets owned by the company",
                  "Total amount of liabilities owned by the company",
                  "Indicator of financial capability of the firms",
                  "The shareholders' claim on assets after all debts owed are paid",
                  "Indicator of profitability of the firm in relation to assets",
                  "Indicator of profitability of the firm in relation to equity",
                  "The log value of total asset",
                  "Country of establishment: Sweden, Switzerland, England, France, 
                   Denmark, Germany, Autria, Netherlands, Finland, Norway, Italy",
                  "Research periods"),
  
  '<b>Mathematics' = c("Equity Market Value/ Equity Book Value",
                  "",
                  "Ln (1 + patent counts)",
                  "Log (Research period – Establishment year)",
                  "",
                  "Current operating income - Previous year’s operating income)/(Previous year’s operating income",
                  "",
                  "",
                  "Total Debt/Total Assets",
                  "Total Assets - Total Debt",
                  "Net profit/Total assets",
                  "Net income/ Total shareholders' equity",
                  "Log (Total Asset)",
                  "SE, CH, GB, FR, DK, DE, AU, NL, FI, NO, IT",
                  "2006, 2007, ..., 2019")
  )

# Format Description table 
table1 <- kable(
  x = table1, 
  format = "html", 
  size = 10,
  escape = FALSE,
  align = "llll",
  caption = "<b>TABLE 1: <i>Data Description") %>%
  
  # styling
  kable_classic(full_width = F, html_font = "calibri", position = "center") %>%
  
  # footnotes for table 
  footnote(general = "All values, except for sales growth, are reported in thousand euros. Sales growth is reported in percentage.",
  footnote_as_chunk = TRUE)

  # save
  save_kable(x = table1, file = "Table 1.png", zoom = 1.5)

# Print table
table1
TABLE 1: Data Description
Statistics Variables Description Mathematics
Firm_value Firm value Tobin’s q Equity Market Value/ Equity Book Value
Count Patent count Number of patents granted
Green_inn Green innovation Indicated by % change in number of patents Ln (1 + patent counts)
Age Firm age The log value of operating years since the firm’s establishment Log (Research period – Establishment year)
Sales Annual sales The current operating income
Sale_growth Annual sales growth The rate of increase in annual sales Current operating income - Previous year’s operating income)/(Previous year’s operating income
Total_asset Total assets Total amount of assets owned by the company
Total_debt Total debt Total amount of liabilities owned by the company
Leverage Leverage ratio Indicator of financial capability of the firms Total Debt/Total Assets
Tshare_equity Total shareholders’ equity The shareholders’ claim on assets after all debts owed are paid Total Assets - Total Debt
ROA Return on Asset Indicator of profitability of the firm in relation to assets Net profit/Total assets
ROE Return on Equity Indicator of profitability of the firm in relation to equity Net income/ Total shareholders’ equity
Size Firm size The log value of total asset Log (Total Asset)
Location Country Country of establishment: Sweden, Switzerland, England, France, Denmark, Germany, Autria, Netherlands, Finland, Norway, Italy SE, CH, GB, FR, DK, DE, AU, NL, FI, NO, IT
Year Year Research periods 2006, 2007, …, 2019
Note: All values, except for sales growth, are reported in thousand euros. Sales growth is reported in percentage.

Section III: Data Descriptives

This section statistically describes the data. The first sub-section provides a univariate analysis, including the statistical features and visualization of the distribution or frequency of individual variables. The second elaborates on the relationships between these variables.

The summary table shows that there are some abnormal values in ROA and ROE, which will be shown in the distribution graphs. These values could be removed to ensure the quality of the data and analysis.

# Statistics summary 
summary(data)
##    Firm_id              Firm                Year        Firm_value    
##  Length:648         Length:648         Min.   :2006   Min.   :-23.16  
##  Class :character   Class :character   1st Qu.:2009   1st Qu.: 51.69  
##  Mode  :character   Mode  :character   Median :2012   Median : 68.89  
##                                        Mean   :2013   Mean   : 64.77  
##                                        3rd Qu.:2016   3rd Qu.: 83.48  
##                                        Max.   :2019   Max.   : 99.15  
##      Count          Green_inn       Location              Age        
##  Min.   :   0.0   Min.   :0.000   Length:648         Min.   :  0.00  
##  1st Qu.:  13.0   1st Qu.:2.639   Class :character   1st Qu.: 12.00  
##  Median :  40.0   Median :3.714   Mode  :character   Median : 17.00  
##  Mean   : 529.6   Mean   :4.166                      Mean   : 24.99  
##  3rd Qu.: 389.0   3rd Qu.:5.966                      3rd Qu.: 22.00  
##  Max.   :5616.0   Max.   :8.634                      Max.   :123.00  
##      Sales           Sales_growth       Total_assets        Total_debt       
##  Min.   :     0.0   Min.   : -100.00   Min.   :    14.9   Min.   :     0.00  
##  1st Qu.:   202.7   1st Qu.:  -18.54   1st Qu.:  1640.5   1st Qu.:     0.00  
##  Median :  2477.1   Median :    7.79   Median : 29162.8   Median :    86.53  
##  Mean   : 11357.4   Mean   :  347.59   Mean   : 48088.3   Mean   :  5892.65  
##  3rd Qu.: 14880.0   3rd Qu.:   43.05   3rd Qu.: 60960.2   3rd Qu.:  4338.32  
##  Max.   :194033.2   Max.   :94150.00   Max.   :625686.0   Max.   :183618.00  
##     Leverage      Tshare_equity           ROA                ROE           
##  Min.   : 0.000   Min.   : -1295.0   Min.   :-262.429   Min.   :-2833.752  
##  1st Qu.: 0.000   1st Qu.:   906.6   1st Qu.: -33.258   1st Qu.:  -49.695  
##  Median : 2.744   Median : 14779.8   Median : -14.535   Median :  -21.118  
##  Mean   :10.627   Mean   : 31155.7   Mean   : -18.027   Mean   :  -36.182  
##  3rd Qu.:15.885   3rd Qu.: 37667.9   3rd Qu.:   3.453   3rd Qu.:    6.183  
##  Max.   :93.895   Max.   :470117.0   Max.   :  71.200   Max.   :  290.151  
##       Size       
##  Min.   : 2.703  
##  1st Qu.: 7.403  
##  Median :10.281  
##  Mean   : 9.319  
##  3rd Qu.:11.018  
##  Max.   :13.347

Univariate analysis

By visualizing the distribution of individual variables, differences between firms’ characteristics (firm heterogeneity) and abnormal values are detected. Solutions to these outliers, if there are any, are suggested.

# Setting global theme
theme_set(theme_classic())

Variable distribution

ROA & ROE

  • There exists extremely high, but few, data points of ROE. These are of Biorfrontera (2006) and Heidelpharma (2010). Since they are few compared to the total number of observations, they can be dropped out.
grid.arrange(
# ROA Distribution
ggplot(data = data, aes(x = ROA)) + 
   geom_density(col="blue") + 
   geom_histogram(aes(y = ..density..), 
                  colour = "black", fill = NA) +
   labs(y = ""),

# ROE Distribution
ggplot(data = data, aes(x = ROE)) + 
   geom_density(col="blue") + 
   geom_histogram(aes(y = ..density..), 
                  colour = "black", fill = NA) +
   labs(y = ""),

# Label figure
nrow = 1, 
top = text_grob("Density distribution",
                color = "red", face = "bold", size = 12))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Detect outliers
as.data.frame(data %>% filter(ROE < -1000 | ROA < -150))
##   Firm_id Firm Year Firm_value Count Green_inn Location Age Sales Sales_growth
## 1  B8F-DE  B8F 2006  -5.037507    10    2.3979       DE   9   148    -62.05128
## 2 HPHA-DE HPHA 2010 -23.162225    56    4.0431       DE  13     0   -100.00000
##   Total_assets Total_debt  Leverage Tshare_equity        ROA       ROE     Size
## 1        16397      15396 93.895225          -826  -44.24139 -2833.752 9.704854
## 2         5591        140  2.504024         -1295 -262.42899 -2639.886 8.628913

Data adjustment

  • After removing substantially different data points, both ROA and ROE are more equivalently distributed, as demonstrated in the below graph.
# Remove abnormal values
data <- data %>% filter(ROE > -1000, ROA > -150)
grid.arrange(
   # ROA Distribution
ggplot(data = data, aes(x = ROA)) + 
   geom_density(col="blue") + 
   geom_histogram(aes(y = ..density..), 
                  colour = "black", fill = NA) +
   labs(y = ""),

   # ROE Distribution
ggplot(data = data, aes(x = ROE)) + 
   geom_density(col="blue") + 
   geom_histogram(aes(y = ..density..), 
                  colour = "black", fill = NA) +
   labs(y = ""),

# Label figure
nrow = 1, 
top = text_grob("Density distribution",
                color = "red", face = "bold", size = 12))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Distribution by firm & country
   # ROA 
p <- ggplot(data = data) + 
   theme(legend.position = "none") + 
         theme(plot.subtitle = element_text(hjust = 0.5, size = 9, 
                                      face = "italic", colour = 'red'))

p1 <- p + geom_boxplot(aes(y = ROA, color = Location))  +
   labs(subtitle = "By country", y = "")

p2 <- p + geom_boxplot(aes(y = ROA, color = Firm)) +
   labs(subtitle = "By firm", y = "")

pa <- ggarrange(p1, p2)

annotate_figure(pa,  
                top = text_grob("Return on Assets",
                color = "red", face = "bold", size = 12))

   # ROE 
p1 <- p + geom_boxplot(aes(y = ROE, color = Location))  +
   labs(subtitle = "By country", y = "")

p2 <- p + geom_boxplot(aes(y = ROE, color = Firm)) +
   labs(subtitle = "By firm", y = "")

pe <- ggarrange(p1, p2)

annotate_figure(pe,  
                top = text_grob("Return on Equity",
                color = "red", face = "bold", size = 12))


Location

# Distribution by country
ggplot(data = data,
       aes(x = Location)) +
   geom_bar(aes(fill = Location)) +
   labs(title = "Number of firms per country",
        x = "", y = "") +
   ylim(0, 150) +
   geom_text(stat = 'count', aes(label = ..count..), vjust = -1) +
   scale_fill_brewer(palette = "Set3") +
   theme(legend.position = "none") + 
   theme(plot.title = element_text(size = 12, hjust = .5, 
                                   face = "bold", color = "red"))


Age

  • Firms can be segregated into two groups according to their age, which is calculated as the gap between the research year and the year of establishment (Table 1).

    • Younger group is aged between \(0 - 40\).

    • Older group is aged between \(80 - 130\).

  • Most biotech companies in France and Italy have a long history of establishment, whereas those located in other nations are recently founded.

  • Denmark is the only country where companies’ ages vary significantly, ranging from 5 to 104 years old.

  • Roche (ROG-CH) is the only old firm exists in Switzerland, which explains the outliers in the 2nd boxplot.

# Distribution
ggplot(data = data, aes(Age)) +
   geom_density(col = "blue") + 
   geom_histogram(aes(y = ..density..),
                  color = 'black', fill = NA) +
   labs(title = "Firm Age Distribution",
        x = "", y = "") +
   theme(legend.position = "none") + 
   theme(plot.title = element_text(size = 12, hjust = .5, 
                                   face = "bold", color = "red"))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Distribution by country
ggplot(data = data, aes(x = Location, y = Age, fill = Location)) +
   geom_boxplot() +
   labs(title = "Firm age by country",
        x = "", y = "") +
   scale_fill_brewer(palette = "Set3") +
   theme(legend.position = "none") + 
   theme(plot.title = element_text(size = 12, hjust = .5, 
                                   face = "bold", color = "red"))

Outliers exist for CH (Switzerland). It is important to check why this happens.

data %>% 
  filter(Location == "CH", Age > 100) %>% 
  select(Firm_id, Age)
##    Firm_id Age
## 1   ROG-CH 110
## 2   ROG-CH 111
## 3   ROG-CH 112
## 4   ROG-CH 113
## 5   ROG-CH 114
## 6   ROG-CH 115
## 7   ROG-CH 116
## 8   ROG-CH 117
## 9   ROG-CH 118
## 10  ROG-CH 119
## 11  ROG-CH 120
## 12  ROG-CH 121
## 13  ROG-CH 122
## 14  ROG-CH 123

Firm Size & Total assets

  • The size of a firm is defined by the Logarithm of the Total Assets to address firm heterogeneity that some companies are much highly valued than the others.
p1 <- ggplot(data = data, aes(Total_assets)) + 
   geom_density(col = "blue") +
   geom_histogram(aes(y = ..density..),
                  color = 'black', fill = NA) +
      labs(subtitle = "Total assets",
        x= "", y = "") +
   theme(plot.subtitle = element_text(size = 9, hjust = 0.5, 
                                      face = "italic", color = "red"))

p2 <- ggplot(data = data, aes(Size)) + 
   geom_density(col = "blue") +
   geom_histogram(aes(y = ..density..),
                  binwidth = 0.7, color = "black", fill = NA) +
   labs(subtitle = "Log(Total Assets)", 
        x= "", y = "") +
   theme(plot.subtitle = element_text(size = 9, hjust = 0.5, 
                                      face = "italic", color = "red"))

p <- ggarrange(p1, p2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
annotate_figure(p,  
                top = text_grob("Firm size",
                color = "red", face = "bold", size = 12))


Sales

  • Some companies had high sales at certain years. For example, Medivir (MVIR B-SE) experienced approximately \(296\)% increase in sales between 2013 and 2014, reaching \(€194033.2\) (in thousands).
# Sales distribution
grid.arrange(

ggplot(data = data, 
       aes(y = Sales)) + 
   geom_boxplot() + 
   labs(subtitle = "Boxplot", 
        x= "", y = "") +
   theme(plot.subtitle = element_text(size = 9, hjust = 0.5, 
                                      face = "italic", colour = 'red')),
  
ggplot(data = data, 
       aes(Sales)) +
   geom_density(col = "blue") +
   geom_histogram(aes(y = ..density..), col = "black", fill = NA) + 
   labs(subtitle = "Histogram", 
        x= "", y = "") +
   theme(plot.subtitle = element_text(size = 9, hjust = 0.5, 
                                      face = "italic", colour = 'red')),

nrow = 1,

# Label figure
top = text_grob("Firm sales (2016 - 2019)",
                color = "red", face = "bold", size = 12))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Detect firm(s) with unusual high sales
data %>% filter(Sales > 100000)
##     Firm_id   Firm Year Firm_value Count Green_inn Location Age    Sales
## 1 MVIR B-SE MVIR B 2014   90.78314    91    4.5218       SE  26 194033.2
##   Sales_growth Total_assets Total_debt Leverage Tshare_equity      ROA      ROE
## 1     296.0562     230548.8   4222.717 1.831594      209299.4 71.19955 79.90538
##       Size
## 1 12.34822

Sales growth

  • As several firms had substantially high sales volumes at some years, their corresponding rates of increase in sales were also high, i.e., NICOX Sales growth in 2017.
# Rate of increase in sales 
ggplot(data = data, aes(y = log(Sales_growth))) + 
   geom_boxplot() + 
   labs(title = "Log of sales growth (2006 - 2019)", 
        x= "", y = "") +
   theme(plot.title = element_text(hjust = 0.5, size = 12, 
                                   face = "bold", colour = 'red'))


Leverage

  • Leverage ratios, indicated as profitability of the business, expose that research firms develop at different rates and make unequal profits.
grid.arrange(
   # histogram
   ggplot(data = data, aes(Leverage)) +
      geom_density(col = "blue") +
      geom_histogram(aes(y = ..density..), 
                     color = "black", fill = "white") +
      labs(subtitle = "Density plot", 
           x= "", y = "") +
      theme(plot.subtitle = element_text(hjust = 0.5, size = 9, 
                                         face = "italic", colour = 'red')) +
      theme(legend.position = "none"),

   # boxplot
   ggplot(data = data, aes(y = Leverage)) +
      geom_boxplot(color = "black", fill = "white") +
      labs(subtitle = "Boxplot", x= "", y = "") +
      theme(plot.subtitle = element_text(hjust = 0.5, size = 9, 
                                      face = "italic", colour = 'red')) +
      theme(legend.position = "none"),

   # label figure
nrow = 1,
top = text_grob("Leverage ratio", color = "red", 
                face = "bold", size = 12))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.


Firm_value

  • Businesses experienced negative value in certain periods.
ggplot(data = data, aes(y = Firm_value)) + 
   labs(title = "Distribution of firm value", 
        x = "", y = "") +
   geom_boxplot(color = 'black', fill = 'white') + 
   theme(plot.title = element_text(hjust = 0.5, size = 12,
                                   face = "bold", colour = 'red')) 


Patent

  • Patent counts vary significantly among firms, ranging from 0 to above 5,600 patents per year. This implies a high possibility of bias when using patent counts as an indicator of GI.

  • To account for these differences, I employ the log change in patent counts, \(log(1 + Count)\). I use parameter \(1 + Count\) instead of \(Count\) to avoid infinite \(log(0)\).

  • Log transformation, as displayed in the second panel of Patent distribution by country figure, can help avoid data skewness bias.

# Distribution
  # patent count
p1 <- ggplot(data = data, aes(y = Count)) + 
   geom_boxplot(color = "black", fill = "white") +
   ylim(0, 6000) +
   labs(subtitle = "Count", y = "") +
   theme(plot.subtitle = element_text(size = 9, hjust = 0.5, 
                                      face = "italic", color = "red"))

  # log of patent count
p2 <- ggplot(data = data, aes(y = Green_inn)) + 
   geom_boxplot(color = "black", fill = "white") +
   labs(subtitle = "Log Count", y = "") +
   theme(plot.subtitle = element_text(size = 9, hjust = 0.5, 
                                      face = "italic", color = "red"))

  # display plots
p <- ggarrange(p1, p2)
annotate_figure(p,  
                top = text_grob("Patent distribution",
                color = "red", face = "bold", size = 12))

# Distribution by country
  # Patent count
p1 <- ggplot(data = data, aes(y = Count)) + 
   geom_boxplot(color = "black", aes(fill = Location)) +
   ylim(0, 6000) + 
   labs(subtitle = "Count",
              y = "") + 
   theme(legend.position = "none")

  # Log of patent count
p2 <- ggplot(data = data, aes(y = Green_inn)) + 
   geom_boxplot(color = "black", aes(fill = Location)) +
   labs(subtitle = "Log Count",
              y = "") +
   guides(fill=guide_legend(title=""))

  # display plots
p <- ggarrange(p1, p2)
annotate_figure(p,  
                top = text_grob("Patent distribution by country",
                color = "red", face = "bold", size = 12))

# Distribution by firm
  # patent count
plot_ly(data, y = ~Count, color = ~Firm_id, 
        type = 'box', colors = "Set1",  
        showlegend = FALSE) %>%
   layout(title = list(text = paste("<b>Patent count</b>",
                                    '<br>',
                                    '<sup>',
                                    "By firm") ,
                       font = list(size = 14, color = 'red')) ,
          xaxis = list(title = "", showticklabels = FALSE),
          yaxis = list (title = ""))
  # log of patent count
plot_ly(data, y = ~Green_inn, color = ~Firm_id,
        type = 'box',  colors = "Set1", 
        showlegend = FALSE) %>%
   layout(title = list(text = paste("Green innovation",
                                    '<br>',
                                    '<sup>',
                                    "(Log change in Patent count)"), 
                       font = list(size = 14, color = 'red')),
          xaxis = list(title = "", showticklabels = FALSE),
          yaxis = list (title = ""))

Time frequency

This subsection investigates how key variables change over time.

Sales

  • Between 2006 and 2019, some firms experienced dramatic changes in sales volume whereas others expanded in a more stable trend.
ggplot(data = data, 
       aes(x = Year, y = Sales, group = 1)) + 
   geom_line() + 
   labs(title = "Firm Sales (2006 - 2019)", 
        x= "", y = "") +
   theme(plot.title = element_text( size = 12, hjust = 0.5, 
                                    face = "bold", colour = 'red')) +
   facet_wrap(~ Firm_id)


Sales growth

  • Very great sales performance are observed in Cosmo Pharma (COX, France), and ImmuPharma (IMM, UK).

  • When leaving out these peculiar values, the sales growth trend becomes more interpretable. Consistent with previous observation that particular businesses achieved extensively high sales volumes, other companies grew at lower paces.

# Detect abnormal values
plot_ly(data, x = ~Year, y = ~Sales_growth, color = ~Firm_id, 
        type = 'scatter', mode = 'line', linetypes = "dashed",
        text = ~ paste('Country: ', Location), 
        colors = "Set2",  
        showlegend = FALSE) %>%
   layout(title = list(text = paste("<b>Sales growth</b>",
                                    '<br>',
                                    '<sup>',
                                    "By firm") ,
                       font = list(size = 14, face = "bold", color = 'red')) ,
          xaxis = list(title = ""),
          yaxis = list(title = ""))
# After dropping substantially high sales volume growth
  # distribution by firm
plot_ly(data %>% filter(Sales_growth < 10000), 
        x = ~Year, y = ~Sales_growth, color = ~Firm_id, 
        type = 'scatter', mode = 'line', linetypes = "dashed",
        text = ~ paste('Country: ', Location), 
        colors = "Set2",  
        showlegend = FALSE) %>%
   layout(title = list(text = paste("<b>Sales growth</b>",
                                    '<br>',
                                    '<sup>',
                                    "By firm (removing outliers)") ,
                       font = list(size = 14, color = 'red')) ,
          xaxis = list(title = ""),
          yaxis = list (title = "", range(c(-1000, 6000))))

Firm sizes & Total assets

  • There is an upward trend in the firm size, meaning that firms expanded their total assets over time. It is also observable that some grew faster than others.
plot_ly(data, x = ~Year, y = ~Size, split = ~Firm,
        type = 'scatter', mode = 'line', 
        alpha = 0.8, linetypes = "dashed",
        text = ~ paste('Country: ', Location),
        showlegend = FALSE
        ) %>%
   layout(title = list(text = "<b>Changes in firm size</b>" ,
                       font = list(size = 14, color = 'red')),
          xaxis = list(title = ""),
          yaxis = list (title = ""))

Leverage

  • Leverage ratios fluctuate over time and do not follow the same pattern.

  • Firms have different leverage ratios, implying that they grow and make profits at different rates. This follows that some businesses are more profitable than others.

plot_ly(data, x = ~Year, y = ~Leverage, color = ~Firm_id,
        type = 'scatter', mode = 'line', linetypes = "dashed",
        text = ~ paste('Country: ', Location),
        colors = "Set1", showlegend = FALSE) %>%
   layout(title = list(text = paste("<b>Leverage ratio</b>",
                                    '<br>',
                                    '<sup>',
                                    "(2006 - 2019 by firm)"), 
                       font = list(size = 14, face = "bold", color = 'red')),
          xaxis = list(title = ""),
          yaxis = list(title = ""))

Firm value

  • Biotechnology companies in the data set are significantly different in terms of economic values. Some incurred substantial losses, causing to negative firm value, whereas others successfully expanded.
ggplot(data = data, 
       aes(x = Year, y = Firm_value)) + 
   geom_line() + 
   facet_wrap(~ Firm_id) +
   labs(title = "Firm Value over time (2006 - 2019)", 
        x = "", y = "") +
   theme(plot.title = element_text(hjust = 0.5, size = 12, 
                                   face = "bold", colour = 'red'))


Green innovation

  • Overall, firms are increasing their investments in green technology, as indicated by the upward trend in the percentage change of patent counts.
plot_ly(data, x = ~Year, y = ~Green_inn, color = ~Firm_id,
        type = 'scatter', mode = 'line', linetypes = "dashed",
        text = ~ paste('Country: ', Location),
        colors = "Set1", showlegend = FALSE) %>%
   layout(title = list(text = paste("<b>Green innovation</b>",
                                    '<br>',
                                    '<sup>',
                                    "(Log change in Patents between 2006 - 2019 by firm)"), 
                       font = list(size = 14, face = "bold", color = 'red')),
          xaxis = list(title = ""),
          yaxis = list(title = ""))

Multivariate analysis

Unit-level variation

This section checks the differences in financial features and patents granted for individual countries and companies.

  • Significant improvements in GI levels are recognized in the Netherlands (\(\sigma = 2.54\)), Sweden (\(\sigma = 2.47\)), England (\(\sigma = 2.29\)), France (\(\sigma = 2.14\)), Switzerland (\(\sigma = 2.03\)).
# Check for variation across 
  # Countries
data %>% 
  group_by(Location) %>%
  select(Count, Green_inn, Age, Sales, Sales_growth, Leverage, ROA, ROE, Size) %>%
  summarise_all(sd)
## Adding missing grouping variables: `Location`
## # A tibble: 11 x 10
##    Location   Count Green_inn   Age  Sales Sales_growth Leverage   ROA    ROE
##    <chr>      <dbl>     <dbl> <dbl>  <dbl>        <dbl>    <dbl> <dbl>  <dbl>
##  1 AU          4.35     0.819  5.69  5377.       146.      0.258 29.1   34.1 
##  2 CH        832.       2.03  38.8  22381.       701.     15.1   28.2   52.2 
##  3 DE        644.       1.85   4.80  7538.        82.0    11.7   20.7   38.0 
##  4 DK        423.       1.39  37.7   8252.       318.     10.5   21.9   34.6 
##  5 FI        848.       0.289  4.18   153.        19.5     8.01   3.11   5.79
##  6 FR       1166.       2.14   7.99 13992.     10582.     14.3   18.2   48.7 
##  7 GB        465.       2.29   7.25 19287.      3222.     15.8   34.9   72.1 
##  8 IT          6.66     0.267  4.18   289.         3.95    7.97   1.60   4.75
##  9 NL       2140.       2.54   7.03   166.       148.     21.5   26.9  127.  
## 10 NO        131.       0.368  4.18  6036.       107.      0.720 16.8   20.4 
## 11 SE       1748.       2.47   5.57 33879.       323.     22.5   34.6   59.4 
## # ... with 1 more variable: Size <dbl>
  # Firms
data %>% 
  group_by(Firm_id) %>%
  select(Count, Green_inn, Age, Sales, Sales_growth, Leverage, ROA, ROE, Size) %>%
  summarise_all(sd)
## Adding missing grouping variables: `Firm_id`
## # A tibble: 51 x 10
##    Firm_id    Count Green_inn   Age   Sales Sales_growth Leverage   ROA   ROE
##    <chr>      <dbl>     <dbl> <dbl>   <dbl>        <dbl>    <dbl> <dbl> <dbl>
##  1 ACTI-SE  933.        0.256  4.18  9037.        548.      19.2  11.5  42.1 
##  2 ADXN-CH   11.7       0.549  4.63  5339.       1916.       6.49 23.4  31.6 
##  3 AGY-GB   399.        0.462  4.23 16191.          9.75    13.7  23.8  42.6 
##  4 ALCLS-FR  25.6       0.948  4.18 14004.        180.      13.6  25.4  92.3 
##  5 ALK B-DK 311.        0.309  4.18    68.9        19.0      6.12  3.71  5.60
##  6 ALONX-FR   4.27      0.829  3.89  6847.        420.       6.63 25.6  46.7 
##  7 ASGLY-DK 155.        0.200  4.18 11025.         30.0      4.79 17.3  35.0 
##  8 AVCT-GB    2.47      0.777  4.03  1302.         70.5      1.09 13.2  14.3 
##  9 AZN-GB   221.        0.216  4.18  1844.         10.1      7.52  6.62 13.1 
## 10 B8F-DE     0.957     0.139  1.29 10980.         23.3     19.3  27.6  63.9 
## # ... with 41 more rows, and 1 more variable: Size <dbl>

Variable correlation

  • GI is negatively correlated to Firm value: \(\rho\) (Green_inn,   Firm_value) \(= -0.29\)

  • Correlations between GI and ROA, ROE, Leverage ratios, which indicate the profitability of a business, are significantly positive. Examples are \(\rho\) (Green_inn,  ROA) = \(0.22\), \(\rho\) (Green_inn,  ROE) = \(0.13\), \(\rho\) (Green_inn,   Leverage) = \(0.26\).

Correlation matrix

cor_matrix <- data %>% 
   select(-c(Firm_id, Firm, Location)) %>% 
   cor()
   # format to table
data.table(cor_matrix)
##            Year   Firm_value       Count   Green_inn          Age        Sales
##  1:  1.00000000 -0.105646018  0.11723219  0.14581297  0.149944702  0.172443242
##  2: -0.10564602  1.000000000 -0.27979215 -0.29278268 -0.164577049 -0.259800565
##  3:  0.11723219 -0.279792148  1.00000000  0.77150050  0.180585352 -0.010967618
##  4:  0.14581297 -0.292782683  0.77150050  1.00000000  0.257774085  0.112962577
##  5:  0.14994470 -0.164577049  0.18058535  0.25777409  1.000000000 -0.009063086
##  6:  0.17244324 -0.259800565 -0.01096762  0.11296258 -0.009063086  1.000000000
##  7:  0.04713653  0.008801748 -0.03143057 -0.04097266 -0.023182857  0.034673840
##  8:  0.23720245 -0.005070278 -0.01029823  0.01701294 -0.111008314  0.544565944
##  9:  0.21221766 -0.373309935  0.07932868  0.06706982 -0.008412190  0.349021357
## 10:  0.16226898 -0.742051223  0.31546748  0.25805927  0.177957535  0.229578546
## 11:  0.16777008  0.228084339 -0.06115594 -0.05078780 -0.129415776  0.444080885
## 12:  0.05103042 -0.098125686  0.08536687  0.21508263  0.361036554  0.308353610
## 13:  0.05512738  0.101551424 -0.03828480  0.13077143  0.312848227  0.241474111
## 14:  0.16993797  0.028726887 -0.04310543 -0.01275560 -0.183030959  0.495329140
##     Sales_growth Total_assets  Total_debt    Leverage Tshare_equity         ROA
##  1:  0.047136533  0.237202452  0.21221766  0.16226898    0.16777008  0.05103042
##  2:  0.008801748 -0.005070278 -0.37330993 -0.74205122    0.22808434 -0.09812569
##  3: -0.031430573 -0.010298230  0.07932868  0.31546748   -0.06115594  0.08536687
##  4: -0.040972663  0.017012938  0.06706982  0.25805927   -0.05078780  0.21508263
##  5: -0.023182857 -0.111008314 -0.00841219  0.17795753   -0.12941578  0.36103655
##  6:  0.034673840  0.544565944  0.34902136  0.22957855    0.44408089  0.30835361
##  7:  1.000000000  0.104906145  0.14751902  0.01911507    0.07720078  0.03957969
##  8:  0.104906145  1.000000000  0.52334404  0.07335013    0.92802977  0.11496076
##  9:  0.147519016  0.523344038  1.00000000  0.56411743    0.29799881  0.06154660
## 10:  0.019115073  0.073350131  0.56411743  1.00000000   -0.11176635  0.13317624
## 11:  0.077200776  0.928029772  0.29799881 -0.11176635    1.00000000  0.09890877
## 12:  0.039579689  0.114960760  0.06154660  0.13317624    0.09890877  1.00000000
## 13:  0.003916834  0.088876311 -0.01651370 -0.03443105    0.09288148  0.80986246
## 14:  0.072884219  0.624537008  0.33090713  0.06969664    0.55733145 -0.08516402
##              ROE         Size
##  1:  0.055127381  0.169937970
##  2:  0.101551424  0.028726887
##  3: -0.038284796 -0.043105429
##  4:  0.130771426 -0.012755602
##  5:  0.312848227 -0.183030959
##  6:  0.241474111  0.495329140
##  7:  0.003916834  0.072884219
##  8:  0.088876311  0.624537008
##  9: -0.016513703  0.330907125
## 10: -0.034431045  0.069696637
## 11:  0.092881478  0.557331452
## 12:  0.809862465 -0.085164024
## 13:  1.000000000  0.009868411
## 14:  0.009868411  1.000000000

Correlation plot

  • Significant codes: 0 ‘\(***\)’ 0.001 ‘\(**\)’ 0.01 ‘\(*\)’ 0.05 ‘\(.\)’ 0.1 ‘ ’.
corPlot(data %>%
   select(-c(Firm_id, Firm, Location)),
   main = "Correlation",
   cex = 0.6,
   alpha = 0.6,
   stars = TRUE,
   cex.axis = 0.8,
   xlas = 2)


Ranking correlations

  • Firm value is strongly correlated to Leverage ratio. Its association with GI is ranked as third strongest.
# All correlations
  # top 10 couples of variables (by correlation coefficient) at 5% level
data %>%
   select(-c(Firm_id, Firm, Location)) %>%
   corr_cross(max_pvalue = 0.05, top = 10)
## Returning only the top 10. You may override with the 'top' argument

# Correlations with Firm_value
  # top 5 most correlated variables to firm value
data %>% 
  corr_var(var = Firm_value, top = 5)


Statistics tables

Full sample statistics

# Data frame containing statistics information
table2 <- data.frame(describe(data %>%
                                select(-c(Firm_id, Firm, Location))), 
                     fast = TRUE)

  # select information to display
table2 <- table2 %>%
  select(c(n, mean, sd, median, min, max)) %>% 
  round(2) %>%
  format()

# Format the table
table2 <- 
  kable(table2, 
      format = "html",
      size = 10,
      escape = FALSE,
      align = "c",
      caption = "<b>TABLE 2: <i>Administrative Data Statistical Summary",
      col.names = c("<b>Observations", "<b>Mean", "<b>St. deviation", "<b>Median", "<b>Min", "<b>Max")) %>%
  
  # styling
  kable_classic(full_width = F, html_font = "calibri", position = "left") %>%
  
  # footnotes for table 
  footnote(general = "The table reports statistics of all available variables using the full sample.",
  footnote_as_chunk = TRUE)
  
  # save table
  save_kable(x = table2, file = "Table 2.png", zoom = 1.5)

# Print table
table2
TABLE 2: Administrative Data Statistical Summary
Observations Mean St. deviation Median Min Max
Year 646 2012.52 4.01 2013.00 2006.00 2019.00
Firm_value 646 65.02 22.76 68.90 -5.91 99.15
Count 646 531.16 1076.05 40.00 0.00 5616.00
Green_inn 646 4.17 2.19 3.71 0.00 8.63
Age 646 25.04 26.18 17.00 0.00 123.00
Sales 646 11392.30 18847.57 2483.57 0.00 194033.24
Sales_growth 646 348.92 4372.79 7.85 -100.00 94150.00
Total_assets 646 48203.12 73635.93 29205.41 14.92 625685.97
Total_debt 646 5886.85 17178.46 85.82 0.00 183618.00
Leverage 646 10.51 15.21 2.74 0.00 77.58
Tshare_equity 646 31255.40 53897.21 14832.46 -657.77 470117.00
ROA 646 -17.61 28.46 -14.40 -142.57 71.20
ROE 646 -27.82 59.60 -21.09 -415.39 290.15
Size 646 9.32 2.34 10.28 2.70 13.35
Note: The table reports statistics of all available variables using the full sample.

Subgroup statistics

Create subgroups

young_group <- data %>% filter(Age <= 50)
old_group <- data %>% filter(Age > 50)

Dataframe containing means and standard errors

# Create a data frame
table3 <- data.frame(NULL)
desc_var <- c("Firm_value", "Count", "Green_inn",
               "Age", "Sales", "Sales_growth", "Total_assets",
               "Total_debt", "Leverage", "Tshare_equity", "ROA",
               "ROE", "Size", "")

# For-loop to assign values into data frame
  # Full sample
for(i in 1:14){
  
    # means of individual variables
  mean <- mean(data[[desc_var[i]]])
  
    # standard errors of individual variables
  sd <- sd(data[[desc_var[i]]])
  
    # number of observations
  obs <- nrow(data)
  
    # assigning values to the table
  table3[i, 1] <- mean
  table3[i, 2] <- sd
  table3[14, 1] <- obs
  }

  # Young group
for (i in 1:14){
  
    # mean of the column 
  mean <- mean(young_group[[desc_var[i]]])
    
    # standard errors of individual covariates
  sd <- sd(young_group[[desc_var[i]]])
  
    # number of observations
  obs <- nrow(young_group)
    
    # assigning values to the table
  table3[i, 3] <- mean
  table3[i, 4] <- sd
  table3[14, 3] <- obs
  }

  # Old group
for (i in 1:14){
    
    # mean of the column 
  mean <- mean(old_group[[desc_var[i]]])
    
    # standard errors of individual covariates
  sd <- sd(old_group[[desc_var[i]]])
  
    # number of observations
  obs <- nrow(old_group)
    
   # assigning values to the table
  table3[i, 5] <- mean 
  table3[i, 6] <- sd
  table3[14, 5] <- obs
  }

# Formatting

  # renaming columns
colnames(table3) <- c(
  "<b>All", "", "<b>Young firms", "", "<b>Old firms")

  # renaming rows
rownames(table3) <- c("Firm value", "Patent count", "Green innovation", "Firm age",
                      "Annual sales", "Annual sales growth", "Total assets", "Total debt", 
                      "Leverage ratio", "Total Shareholders' Equity", "Return on Asset", 
                      "Return on Equity", "Firm size", "Number of observations")

  # round values
table3 <- round(table3, 3)

Translate into table

# Standard errors & means

   # print standard errors below the means
for (i in 1:13){
  table3[i, 1] <- paste(
    table3[i, 1], '<br>', '<sup>', "(", table3[i, 2],  ")", sep = ""
    )
  table3[i, 3] <- paste(
    table3[i, 3], '<br>', '<sup>', "(", table3[i, 4], ")", sep = ""
    )
  table3[i, 5] <- paste(
    table3[i, 5], '<br>', '<sup>', "(", table3[i, 6], ")", sep = ""
    )
}
table3[, 1] <- linebreak(table3[, 1])
table3[, 2] <- linebreak(table3[, 2])
table3[, 3] <- linebreak(table3[, 3])

  # remove columns of the standard errors 
table3 <- table3[-c(2,4,6)]
table3 
##                                                   <b>All
## Firm value                       65.017<br><sup>(22.755)
## Patent count                  531.158<br><sup>(1076.049)
## Green innovation                   4.169<br><sup>(2.195)
## Firm age                         25.037<br><sup>(26.176)
## Annual sales                11392.295<br><sup>(18847.57)
## Annual sales growth            348.92<br><sup>(4372.788)
## Total assets               48203.119<br><sup>(73635.934)
## Total debt                  5886.846<br><sup>(17178.461)
## Leverage ratio                    10.511<br><sup>(15.21)
## Total Shareholders' Equity 31255.396<br><sup>(53897.214)
## Return on Asset                 -17.608<br><sup>(28.457)
## Return on Equity                -27.821<br><sup>(59.596)
## Firm size                            9.32<br><sup>(2.34)
## Number of observations                               646
##                                           <b>Young firms
## Firm value                        66.215<br><sup>(23.24)
## Patent count                  484.849<br><sup>(1037.999)
## Green innovation                   4.011<br><sup>(2.182)
## Firm age                           16.28<br><sup>(6.651)
## Annual sales               11732.683<br><sup>(19177.124)
## Annual sales growth           390.935<br><sup>(4629.551)
## Total assets                52517.503<br><sup>(76452.13)
## Total debt                  6176.251<br><sup>(17988.907)
## Leverage ratio                    9.662<br><sup>(15.368)
## Total Shareholders' Equity 34515.071<br><sup>(56144.725)
## Return on Asset                 -21.184<br><sup>(27.964)
## Return on Equity                -34.235<br><sup>(59.567)
## Firm size                          9.508<br><sup>(2.336)
## Number of observations                               576
##                                            <b>Old firms
## Firm value                      55.159<br><sup>(15.126)
## Patent count                 912.214<br><sup>(1296.059)
## Green innovation                   5.467<br><sup>(1.85)
## Firm age                           97.1<br><sup>(11.31)
## Annual sales               8591.388<br><sup>(15717.086)
## Annual sales growth              3.196<br><sup>(13.267)
## Total assets               12701.902<br><sup>(23335.27)
## Total debt                  3505.457<br><sup>(7444.013)
## Leverage ratio                    17.5<br><sup>(11.778)
## Total Shareholders' Equity  4432.931<br><sup>(8211.853)
## Return on Asset                  11.819<br><sup>(8.233)
## Return on Equity                24.959<br><sup>(21.513)
## Firm size                         7.767<br><sup>(1.723)
## Number of observations                               70
# Formatting & Saving

  # add headlines
table3 <- table3 %>%
    kable(
      format = "html",
      size = 10,
      escape = FALSE,
      caption = "<b>TABLE 3: <i>Descriptive Statistics",
      align = "c") %>%
    kable_classic(full_width = F, html_font = "calibri", position = "left") %>%
    
   # notify variable types
    pack_rows("Dependent variable", 1, 1) %>%
    pack_rows("Explanatory variable", 2, 3) %>%
    pack_rows("Firm-specific features", 4, 14) %>%
  
   # footnotes for table 
    footnote(general = "Table columns report means and standard deviations (shown in parentheses) of young and old firm samples compared to the full sample.",
    footnote_as_chunk = TRUE)
  
   # save table
    save_kable(x = table3, file = "Table 3.png", zoom = 1.5)

# Print table
table3
TABLE 3: Descriptive Statistics
All Young firms Old firms
Dependent variable
Firm value 65.017
(22.755)
66.215
(23.24)
55.159
(15.126)
Explanatory variable
Patent count 531.158
(1076.049)
484.849
(1037.999)
912.214
(1296.059)
Green innovation 4.169
(2.195)
4.011
(2.182)
5.467
(1.85)
Firm-specific features
Firm age 25.037
(26.176)
16.28
(6.651)
97.1
(11.31)
Annual sales 11392.295
(18847.57)
11732.683
(19177.124)
8591.388
(15717.086)
Annual sales growth 348.92
(4372.788)
390.935
(4629.551)
3.196
(13.267)
Total assets 48203.119
(73635.934)
52517.503
(76452.13)
12701.902
(23335.27)
Total debt 5886.846
(17178.461)
6176.251
(17988.907)
3505.457
(7444.013)
Leverage ratio 10.511
(15.21)
9.662
(15.368)
17.5
(11.778)
Total Shareholders’ Equity 31255.396
(53897.214)
34515.071
(56144.725)
4432.931
(8211.853)
Return on Asset -17.608
(28.457)
-21.184
(27.964)
11.819
(8.233)
Return on Equity -27.821
(59.596)
-34.235
(59.567)
24.959
(21.513)
Firm size 9.32
(2.34)
9.508
(2.336)
7.767
(1.723)
Number of observations 646 576 70
Note: Table columns report means and standard deviations (shown in parentheses) of young and old firm samples compared to the full sample.

Section IV: Empirical Approach

Hypotheses

This analysis aims at testing 3 hypotheses:

  • Hypothesis 1: Green innovation (GI) has a negative influence on firm value in the short-run.

  • Hypothesis 2: In the long run, GI can generate favorable outcomes to businesses.

  • Hypothesis 3: GI affects the economic performance of younger and older firms differently.


Significance level

This analysis chooses a general significance level of 10%. The significance of a variable can also be found by dividing its estimate by its standard error. Any coefficient that has a significance level at or below 10% is considered significant.


Methodology

Both the Pooled OLS and Fixed Effects Regressions are employed in this section, with the emphasize being placed on the later estimator.

This analysis initially employs The Pooled OLS (Pooled Ordinary Least Squares), which can be simply defined as a Linear Regression estimator applied to Panel Data, to investigate the impact of GI on firm value. This metric works under two strict assumptions that, firstly, time-constant attributes are present and unbiased and, secondly, consistent estimate of variables are expected. However, this is unlikely to be the case due to high possibility of Omitted Variable Bias (OVB). For this reason, Fixed Effects Regression is later introduced to deal with the issue.

I deploy two-way effects method for the Fixed Effects Regression as it can control for both time-specific and entity-specific effects. Unlike the Pooled OLS, this estimator can account for unobservable time-invariant factors, a key source of potential OVB. Examples for this type of bias are management quality, human resources’ ability. By allowing constants to vary within individual groups of research units, i.e., all observations for Roche Holding AG from 2006 to 2019, the effects of these attributes are mathematically removed and the coefficients of the variables of interested remain unaffected. Consequently, a less complicated equation is constructed and and reliable results are obtained.

Bias can also come from observable factors, or heterogeneity among firms, i.e., sizes, growth rates, and profitability. These biases are can be easily eliminated by directly adding them into the regression model. As a results, Fixed Effect Regression method can be more efficient in dealing with OVB.

The regression procedure is described as follows. I perform the regressions, firstly, on the administrative data, and secondly add one-year and two-year lagged and leading values on the variables of interest to the models. The latter step helps to statistically test the second hypothesis regarding the long-term effects of GI. Similar procedures are applied to two age subgroups to determine the responsiveness of young and older companies to GI. Next, I investigate the deviations of the estimated coefficients of financial features and standard errors between models of different age groups to draw conclusions for the third hypothesis. If the coefficients are significantly different, there should be separate analyses for each group. Section V closes with robustness check methods regarding effectiveness of log transformation and Fixed Effects regression in handling different types of biases. Results are compared and explained in the next section. I will elaborate on the key findings in Section VII. Results of the word file.


Model construction

The baseline model includes a dependent variable, Firm_value, and a regressor, Green_inn, while there are three extended models, which are simply the baseline model including multiple control variables.

While running the Pooled OLS Regression, I use Anova() function to determine which extended model performs the best, or in other words, which variables should be included in the extended equation. This model will later be used as the main extended model for further analysis using Fixed Effects Regression.


Robustness check

The third part of this section presents 2 robustness checks on:

  • Data skewness: Log transformation might not be able to fully eliminate biases. Therefore, it is important to inspect if excluding all abnormalities would affect the results. This check is conducted on cleaned data, which is original dataset excluding abnormalities (observations with over 4000 patents granted per year).

  • Bias-control efficiency of Fixed Effects Regression: Although Fixed Effects Regression can handle biases caused by both observable and unobservable variables, there is no certain indicator of how good this method can be. Therefore, I investigate how the coefficients of variables of interest change when several time-constant variables are included/excluded off the equation. Previous regression results are robust if the estimates are insensitive to the model specification, otherwise unreliable.


List of models

** Create a table**

# Create a data frame containing indicators & model specifications
table4 <- data.table(
  
  `<b>Models` = c("Baseline", "Extended", "Baseline", "Extended", ""),
  
  `<b>Short effects` = c("lm_base",
                         "lm_ext1, lm_ext2, lm_ext3",
                         "fix_base, young_base, old_base",
                         "fix_ext, young_ex, old_ext"),
  
  `<b>Long effects` = c("",
                        "lag1_base, lag2_base, lead1_base, lead2_base, lag1_ybase, lag2_ybase",
                        "lead1_ybase, lead2_ybase, lag1_obase, lag2_obase, lead1_obase, lead2_obase",
                        "lag1_ext, lag_2ext, lead1_ext, lead_2ext, lag1_yext, lag2_yext", 
                        "lead1_yext, lead2_yext, lag1_oext, lag2_oext, lead1_oext, lead2_oext"))

Translate into table

# Format table 
kable(
  x = table4, 
  format = "html",
  size = 10,
  escape = FALSE,
  caption = "<b>TABLE 4: <i>List of Regression Models",
  align = "cll") %>%
  kable_classic(full_width = F, html_font = "calibri", position = "left") %>%
    
  # notify variable rows
  pack_rows("Pooled OLS", 1, 2, italic = TRUE) %>%
  pack_rows("Fixed Effects", 3, 5, italic = TRUE) %>%
  
  # footnotes for table 
  footnote(general = "lm_ext2 is selected as the main extended model in Fixed Effects Regression.",
  number = c(
    "'base' and 'ext' indicate baseline or extended models",
    "'lag' and 'lead' indicate models with lagged or leading year(s)",
    " 'y' and 'o' indicate age subgroups", 
    "1/2: 1 or 2 years"))
TABLE 4: List of Regression Models
Models Short effects Long effects
Pooled OLS
Baseline lm_base
Extended lm_ext1, lm_ext2, lm_ext3 lag1_base, lag2_base, lead1_base, lead2_base, lag1_ybase, lag2_ybase
Fixed Effects
Baseline fix_base, young_base, old_base lead1_ybase, lead2_ybase, lag1_obase, lag2_obase, lead1_obase, lead2_obase
Extended fix_ext, young_ex, old_ext lag1_ext, lag_2ext, lead1_ext, lead_2ext, lag1_yext, lag2_yext
lm_base lead1_yext, lead2_yext, lag1_oext, lag2_oext, lead1_oext, lead2_oext
Note:
lm_ext2 is selected as the main extended model in Fixed Effects Regression.
1 ‘base’ and ‘ext’ indicate baseline or extended models
2 ‘lag’ and ‘lead’ indicate models with lagged or leading year(s)
3 ‘y’ and ‘o’ indicate age subgroups
4 1/2: 1 or 2 years
  # save table
  save_kable(x = "table4", file = "Table 4.png", zoom = 1.5)
  
# print table
table4
##    <b>Models               <b>Short effects
## 1:  Baseline                        lm_base
## 2:  Extended      lm_ext1, lm_ext2, lm_ext3
## 3:  Baseline fix_base, young_base, old_base
## 4:  Extended     fix_ext, young_ex, old_ext
## 5:                                  lm_base
##                                                               <b>Long effects
## 1:                                                                           
## 2:       lag1_base, lag2_base, lead1_base, lead2_base, lag1_ybase, lag2_ybase
## 3: lead1_ybase, lead2_ybase, lag1_obase, lag2_obase, lead1_obase, lead2_obase
## 4:             lag1_ext, lag_2ext, lead1_ext, lead_2ext, lag1_yext, lag2_yext
## 5:       lead1_yext, lead2_yext, lag1_oext, lag2_oext, lead1_oext, lead2_oext

Pooled OLS Regression

Baseline model

  • GI tend to create negative values to the business: Firm value decreases by 0.03% per 1% increase in the number of patents.
# Model
lm_base <- lm(formula = Firm_value ~ Green_inn,
              data = data)
summary(lm_base)
## 
## Call:
## lm(formula = Firm_value ~ Green_inn, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -81.479 -12.770   4.095  16.736  41.617 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  77.6739     1.8404  42.205  < 2e-16 ***
## Green_inn    -3.0359     0.3907  -7.771  3.1e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 21.77 on 644 degrees of freedom
## Multiple R-squared:  0.08572,    Adjusted R-squared:  0.0843 
## F-statistic: 60.38 on 1 and 644 DF,  p-value: 3.098e-14
# Visualization
p <- ggplot(data, aes(Green_inn, Firm_value)) +
   geom_point() 
  
  # linear model
p1 <- p + geom_smooth() + labs(x = "", y = "Firm value")

  # linear model smoothing
p2 <- p + geom_smooth(method = "lm") +
   labs(x = "", y = "")

  # display plot
p <- ggarrange(p1, p2)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
annotate_figure(p,  
                top = text_grob("Green innovation & Firm value",
                                color = "red", face = "bold", size = 12),
                bottom = text_grob("GI index", size = 10))


Extended models

  • GI can worsen economic performance of firms. Per 1% increase in GI, firm value can decrease by approximately 1.4%, ceteris paribus.
# Adding control variables
   # Base model + all control variables
lm_ext1 <- lm(formula = Firm_value ~ Green_inn + Location + Age +
                  Sales_growth + Leverage + Year + ROA + ROE + Size, 
                data = data)

   # extended model, - ROE
lm_ext2 <- lm(formula = Firm_value ~ Green_inn + Location + Age +
                 Sales_growth +  Leverage + Year + ROA + Size, 
               data = data)

   # extended model, - ROA
lm_ext3 <- lm(formula = Firm_value ~ Green_inn + Location + Age +
                  Sales_growth + Leverage + Year + ROE + Size, 
               data = data)

# Result summary
modelsummary(list(lm_ext1, lm_ext2, lm_ext3))
Model 1 Model 2 Model 3
(Intercept) 52.658 6.084 24.302
(296.810) (301.864) (299.037)
Green_inn -1.348 -1.347 -1.416
(0.307) (0.312) (0.309)
LocationCH -11.927 -12.212 -12.399
(3.467) (3.527) (3.491)
LocationDE -4.586 -4.683 -4.470
(3.493) (3.554) (3.520)
LocationDK -7.168 -7.855 -7.725
(3.655) (3.716) (3.680)
LocationFI 4.608 3.358 3.229
(5.627) (5.720) (5.656)
LocationFR -15.603 -16.495 -16.075
(3.442) (3.498) (3.467)
LocationGB -12.355 -11.612 -11.867
(3.367) (3.423) (3.390)
LocationIT -1.485 -1.853 -2.369
(5.301) (5.394) (5.336)
LocationNL -9.303 -11.946 -10.610
(4.639) (4.688) (4.659)
LocationNO 2.130 1.260 1.093
(5.027) (5.112) (5.057)
LocationSE -7.358 -7.825 -7.138
(3.713) (3.777) (3.742)
Age -0.068 -0.053 -0.069
(0.029) (0.030) (0.030)
Sales_growth 0.000 0.000 0.000
(0.000) (0.000) (0.000)
Leverage -0.981 -1.033 -1.016
(0.042) (0.042) (0.041)
Year 0.015 0.038 0.030
(0.148) (0.150) (0.149)
ROA -0.123 0.022
(0.037) (0.022)
ROE 0.084 0.037
(0.017) (0.010)
Size 0.920 1.064 1.015
(0.308) (0.312) (0.309)
Num.Obs. 646 646 646
R2 0.622 0.608 0.616
R2 Adj. 0.611 0.598 0.605
AIC 5280.5 5302.1 5289.8
BIC 5369.9 5387.0 5374.7
Log.Lik. -2620.266 -2632.030 -2625.878
F 57.389 57.364 59.177
RMSE 14.18 14.43 14.30

Model comparison

Anova test

This is to test whether or not it is necessary to have both ROA and ROE into the models.

If the resulting p-value is sufficiently low (\(p\_value < 0.05\)), the more complex model is significantly better than the simpler model, and thus favor the more complex model. If the p-value is not sufficiently low (\(p\_value > 0.05\)), the simpler model is favored.

The anova() test shows that the baseline model including all variables except for ROE, lm_ext2, performs the best, meaning that including ROE does not necessarily add explanatory power to the regression models.

\[Firm\_value = \beta_0 + \beta_1 * Green\_inn + \epsilon\] \[Firm\_value = \beta_0 + \beta_1 * Green\_inn + \sum \beta_n * Controls + \epsilon\]

Therefore, this analysis will mainly use the simple baseline model and the second extended model for further research.

anova(lm_ext1, lm_ext2, lm_ext3)
## Analysis of Variance Table
## 
## Model 1: Firm_value ~ Green_inn + Location + Age + Sales_growth + Leverage + 
##     Year + ROA + ROE + Size
## Model 2: Firm_value ~ Green_inn + Location + Age + Sales_growth + Leverage + 
##     Year + ROA + Size
## Model 3: Firm_value ~ Green_inn + Location + Age + Sales_growth + Leverage + 
##     Year + ROE + Size
##   Res.Df    RSS Df Sum of Sq      F    Pr(>F)    
## 1    627 126147                                  
## 2    628 130826 -1   -4678.9 23.256 1.782e-06 ***
## 3    628 128358  0    2468.1                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Diagnostic plots

Diagnostic plots help to visually check if mathematical assumptions have been violated (homogeneity of variance) or whether the data contains outliers (Schweinberger, 2020).

There are three main residual plots, with each serving its own purposes:

  • Residuals: The 3 plots in the first panel demonstrate the differences between the observed and predicted values produced by the regression equations. However, it is impossible to compare these residuals to those of other models as these residuals not standardized.

  • Standardized Residuals: The next 3 plots in the middle panel show the normalized residuals, which are computed by dividing the residuals by their standard deviation. Then, the normalized residuals can be plotted against the observed values.

    • In this way, not only are standardized residuals obtained, but the values of the residuals are transformed into z-values, and one can use the z-distribution to find problematic data points.

    • Rules of thumb:

      (1) Points with values higher than 3.29 should be removed from the data.

      (2) If more than 1% of the data points have values higher than 2.58, then the error rate of our model is too high.

      (3) If more than 5% of the data points have values greater than 1.96, then the error rate of our model is too high.

  • Studentized residuals: or also called adjusted predicted values (bottom panel). Although they also indicate the difference between the observed and predicted values, they are calculated in a way that allows us to identify influential data points.

Two potentially problematic data points (the top-most and bottom-most point) are detected in the plots. These two points are observably different from the other data points and may therefore be outliers. We will test later if these points need to be removed.

# Baseline model
  # data frame containing Residuals
df_base <- data.frame(id = 1:length(resid(lm_base)),
                      residuals = resid(lm_base),
                      standard = rstandard(lm_base),
                      studend = rstudent(lm_base))

  # generate plots
p1 <- ggplot(df_base, aes(x = id, y = residuals)) + 
  geom_point() +
  labs(subtitle = "Baseline model", 
       x = "", y = "Residuals") +
    theme(panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank()) +
  theme(plot.subtitle = element_text(size = 9, hjust = 0.5, face = "italic"))

p2 <- ggplot(df_base, aes(x = id, y = standard)) + 
  geom_point() +
  labs(x = "", y = "Standardized Residuals") +
  theme(panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank())

p3 <- ggplot(df_base, aes(x = id, y = studend)) + 
  geom_point() +
  labs(x = "", y = "Studentized Residuals") +
  theme(panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank())


# Extended model 2
 # data frame containing Residuals
df_extend <- data.frame(id = 1:length(resid(lm_ext2)),
                        residuals = resid(lm_ext2),
                        standard = rstandard(lm_ext2),
                        studend = rstudent(lm_ext2))

  # generate plots
p4 <- ggplot(df_extend, aes(x = id, y = residuals)) + 
  geom_point() +
  labs(subtitle = "Extended baseline", 
       x = "", y = "" ) +
  theme(panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank()) +
  theme(plot.subtitle = element_text(size = 9, hjust = 0.5, face = "italic"))

p5 <- ggplot(df_extend, aes(x = id, y = standard)) + 
  geom_point() +
  labs(x = "", y = "") +
  theme(panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank()) 

p6 <- ggplot(df_extend, aes(x = id, y = studend)) + 
  geom_point() +
  labs(x = "", y = "") +
  theme(panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank())

# Display plots
p <- ggarrange(p1, p4, p2, p5, p3, p6, nrow = 3, ncol = 2)
annotate_figure(p,  
                top = text_grob("Model performance",
                color = "red", face = "bold", size = 12),
                bottom = text_grob("Index", size = 10))


Diagnostic plots (cont.) (Schweinberger, 2020)

  • Residuals vs Fitted: (upper left panel) detects outliers or determines the correlation between residuals and predicted values. This graph can help to remove data points that are too influential (outliers).

  • Normal Q-Q: (upper right panel) shows if the residuals are normally distributed (following a normal distribution). Observations 381, 243, 24 lie farther away from the dashed line, thus should be removed.

  • Scale-Location: tests for homoscedasticity, or whether or not the variance of the residuals remains constant and does not correlate with any independent variable. If there is a trend in the line, we are dealing with heteroscedasticity, that is, a correlation between independent variables and the residuals, which is very problematic for regressions.

This strengthens my agrument that the Fixed Effect Regression should be preferred to deal with time-inconsistant variables.

  • Residuals vs Leverage: shows Cook’s distance value which is used as a measure of how strongly a data point affects the accuracy of the regression. If greater than 1, the data points are influentially problematic and thus should be dropped. Clearly, data points 381, 34, 152 disproportionately affect the regression.
# Extra diagnostic plots
  # simple model
autoplot(lm_base) + 
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) 

  # extended model
autoplot(lm_ext2) + 
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) 


Fixed Effects Regression

Methodology

The plm() command is used the in Fixed Effects Regression instead of the lm() since it is more computationally efficient, especially as the models become more complex.

I firstly investigate the short term effects of GI on the economic performance of the researched units using the full sample. I then test the hypothesis that GI generates economic gains in the long run by using lags and leads of 1 and 2 years. Standard errors are shown in robust values. The same procedure is applied to subgroup samples to test the third hypothesis regarding the effects of GI on firms of different age groups.


Full sample

I. Short run effects

  • In the baseline model, the coefficient on GI is positive, \(5.6195\) (\(s.e. = 1.7137\)), and significant at \(1\)%. However, it becomes insignificant and decreases to \(1.1263e+00\) (\(s.e. = 1.3012e+00\)) when adding control variables.

  • Robust standard errors do not change much, implying that such a change is likely to be caused by explanatory power of control variables rather than random factors.

# Baseline model
fix_base <- plm(formula = Firm_value ~ Green_inn, 
               data = data, 
               index = c("Firm_id", "Year"), 
               model = "within",
               effect = "twoways")


# Extended model
fix_ext <- plm(formula = Firm_value ~ Green_inn + Location + Age +
                 Sales_growth + Leverage + Year + ROA + Size,
               data = data, 
               index = c("Firm_id", "Year"), 
               model = "within",
               effect = "twoways")

# Print results using robust standard errors
coeftest(fix_base, vcov. = vcovHC, type = "HC1")
## 
## t test of coefficients:
## 
##           Estimate Std. Error t value Pr(>|t|)   
## Green_inn   5.6195     1.7137  3.2791 0.001104 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coeftest(fix_ext, vcov. = vcovHC, type = "HC1")
## 
## t test of coefficients:
## 
##                 Estimate  Std. Error  t value  Pr(>|t|)    
## Green_inn     1.1263e+00  1.3012e+00   0.8655 0.3870968    
## Sales_growth -4.4859e-05  8.6038e-05  -0.5214 0.6022972    
## Leverage     -9.5759e-01  7.2035e-02 -13.2934 < 2.2e-16 ***
## ROA           3.3095e-02  2.9773e-02   1.1116 0.2667789    
## Size          6.0125e+00  1.6474e+00   3.6497 0.0002863 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

II. Long run effects

  • Regardless of the influence of other factors, GI can create positive outcomes for companies in the near future. It can increase firm value by 4.6% per 1% increase in patent ownership after one year of technological development, as shown in the baseline equations \(lag1\_ base\) (significant at 1% ) and \(lead1 \_base\) (significant at 0.1%).

  • Nevertheless, when adding control variables, no significant coefficients are detected in both 1 and 2-year lag models. For example, GI estimate in 1-year lag model shrinks to \(1.8744e+00\) while standard errors do not vary, \(s.e. = 1.5696\) and \(s.e. = 1.7279\) respectively.

Lagged years

# Baseline
  # 1 year lag
lag1_base <- plm(formula = Firm_value ~  lag(Green_inn, 1),
                 data = data,
                 index = c("Firm_id", "Year"),
                 effect = "twoways",
                 model = "within",
                 na.action = na.exclude)

  # 2 years lag
lag2_base <- plm(formula = Firm_value ~ lag(Green_inn, 2),
                 data = data,
                 index = c("Firm_id", "Year"),
                 effect = "twoways",
                 model = "within",
                 na.action = na.exclude)


# Extended model
  # 1 year lag
lag1_ext <- plm(formula = Firm_value ~  lag(Green_inn, 1) + Location + Age +
                 Sales_growth + Leverage + Year + ROA + Size,
                 data = data,
                 index = c("Firm_id", "Year"),
                 effect = "twoways",
                 model = "within",
                 na.action = na.exclude)

  # 2 years lag
lag2_ext <- plm(formula = Firm_value ~ lag(Green_inn, 2) + Location + Age +
                 Sales_growth + Leverage + Year + ROA + Size,
                data = data,
                index = c("Firm_id", "Year"),
                effect = "twoways",
                 model = "within",
                 na.action = na.exclude)

# Print results using robust standard errors
coeftest(lag1_base, vcov. = vcovHC, type = "HC1")
## 
## t test of coefficients:
## 
##                   Estimate Std. Error t value Pr(>|t|)   
## lag(Green_inn, 1)   4.6127     1.7279  2.6695 0.007834 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coeftest(lag1_ext, vcov. = vcovHC, type = "HC1")
## 
## t test of coefficients:
## 
##                      Estimate  Std. Error  t value Pr(>|t|)    
## lag(Green_inn, 1)  1.8744e+00  1.1942e+00   1.5696  0.11713    
## Sales_growth      -5.1034e-05  9.7160e-05  -0.5253  0.59963    
## Leverage          -9.2992e-01  8.5828e-02 -10.8347  < 2e-16 ***
## ROA                7.0163e-02  3.4531e-02   2.0319  0.04268 *  
## Size               4.1242e+00  2.0953e+00   1.9683  0.04957 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coeftest(lag2_base, vcov. = vcovHC, type = "HC1")
## 
## t test of coefficients:
## 
##                   Estimate Std. Error t value Pr(>|t|)  
## lag(Green_inn, 2)   4.7086     1.9057  2.4708  0.01384 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coeftest(lag2_ext, vcov. = vcovHC, type = "HC1")
## 
## t test of coefficients:
## 
##                      Estimate  Std. Error  t value Pr(>|t|)    
## lag(Green_inn, 2)  2.2302e+00  1.4241e+00   1.5661  0.11801    
## Sales_growth      -4.6299e-05  9.7073e-05  -0.4769  0.63362    
## Leverage          -9.2321e-01  8.4005e-02 -10.9899  < 2e-16 ***
## ROA                6.1532e-02  3.4658e-02   1.7754  0.07649 .  
## Size               4.3855e+00  2.2872e+00   1.9174  0.05580 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Years forwards

  • Similar results are found in the baseline models using leading value of firms as dependent variable.

  • However, the extended lead models produce different outcomes from those of lags. Instead of showing no effects, they evidence that today’s investments can bring marginal values in the future.

  • Models using 1 and 2-year leads produce relatively similar results: significant and positive coefficients. This strongly suggests that the increase in the firm value tomorrow can be explained by the growing rate of today’s GI.

  • For example, the baseline models, \(lead1\_base\) and \(lead2\_base\), create estimates of \(4.6127\) (\(s.e. = 1.7279\)) and \(4.7086\) (\(s.e. = 1.9057\)). Comparably, estimates of extended models, \(lead1\_ext\) and \(lead2\_ext\), are \(3.4333e+00\) (\(s.e. = 1.5799e+00\)) and 4.7633e+00 (\(s.e. = 1.8656e+00\)) respectively.

\(\implies\) In short, these results support to the second hypothesis that the long-run performance of a biotechnology company is positively constructed by its GI decisions.

# Baseline model
  # 1 year leading 
lead1_base <- plm(lead(Firm_value, 1) ~ Green_inn,
                   data = data,
                   index = c("Firm_id", "Year"),
                   effect = "twoways", model = "within",
                   na.action = na.exclude)

  # 2 years leading 
lead2_base <- plm(lead(Firm_value, 2) ~ Green_inn,
                   data = data, index = c("Firm_id", "Year"),
                   effect = "twoways", model = "within",
                   na.action = na.exclude)

# Extended model
  # 1 year leading 
lead1_ext <- plm(lead(Firm_value, 1) ~ Green_inn + Location + Age +
                   Sales_growth + Leverage + Year + ROA + Size,
                   data = data, index = c("Firm_id", "Year"),
                   effect = "twoways", model = "within",
                   na.action = na.exclude)

  # 2 years leading 
lead2_ext <- plm(lead(Firm_value, 2) ~ Green_inn + Location + Age +
                   Sales_growth + Leverage + Year + ROA + Size,
                   data = data, index = c("Firm_id", "Year"),
                   effect = "twoways", model = "within",
                   na.action = na.exclude)

# Print results using robust standard errors
coeftest(lead1_base, vcov. = vcovHC, type = "HC1")
## 
## t test of coefficients:
## 
##           Estimate Std. Error t value Pr(>|t|)   
## Green_inn   4.6127     1.7279  2.6695 0.007834 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coeftest(lead1_ext, vcov. = vcovHC, type = "HC1")
## 
## t test of coefficients:
## 
##                 Estimate  Std. Error t value  Pr(>|t|)    
## Green_inn     3.4333e+00  1.5799e+00  2.1731   0.03023 *  
## Sales_growth  2.8677e-05  3.8012e-05  0.7544   0.45094    
## Leverage     -4.8823e-01  1.0873e-01 -4.4904 8.786e-06 ***
## ROA           5.9007e-02  3.9735e-02  1.4850   0.13815    
## Size         -1.1442e+00  1.5433e+00 -0.7414   0.45881    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coeftest(lead2_base, vcov. = vcovHC, type = "HC1")
## 
## t test of coefficients:
## 
##           Estimate Std. Error t value Pr(>|t|)  
## Green_inn   4.7086     1.9057  2.4708  0.01384 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coeftest(lead2_ext, vcov. = vcovHC, type = "HC1")
## 
## t test of coefficients:
## 
##                 Estimate  Std. Error t value  Pr(>|t|)    
## Green_inn     4.7633e+00  1.8656e+00  2.5533 0.0109911 *  
## Sales_growth  8.0165e-06  3.6644e-05  0.2188 0.8269287    
## Leverage     -3.2287e-01  9.2043e-02 -3.5078 0.0004959 ***
## ROA           1.4619e-02  4.3599e-02  0.3353 0.7375535    
## Size         -2.9217e+00  1.4892e+00 -1.9620 0.0503618 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Sub-conclusions 1:

  • In the short run, no effects are detected for the whole sample. GI neither increases nor damages firm value.

  • In the long-run, GI possibly generates favorable outcomes to those who undertake the transition.


Age subgroups

Firms are segregated into two subsets according to their ages, named as \(young\_group\) and \(old\_group\). Regressions are performed with the Fixed Effects estimator only. However, it bears stressing that there is a large difference in the number of observations between the 2 samples. This follows that results should be interpreted with care.

I. Short run effects

Young companies

  • A significant coefficient on GI is found in the baseline model using young group sample, which indicates that leveraging sustainable technology can have an immediate influence on the value of young enterprises. On average, per \(1\)% increase in GI, a young bio-tech company can achieve a \(5.6853\)% increase in value in the same year.
# Baseline model
young_base <- plm(formula = Firm_value ~ Green_inn, 
               data = young_group, 
               index = c("Firm_id", "Year"), 
               model = "within",
               effect = "twoways")

# Extended model
young_ext <- plm(formula = Firm_value ~ Green_inn + Location + Age +
                   Sales_growth + Leverage + Year + ROA + Size,
               data = young_group, 
               index = c("Firm_id", "Year"), 
               model = "within",
               effect = "twoways")

# Print results using robust standard errors
coeftest(young_base, vcov. = vcovHC, type = "HC1")
## 
## t test of coefficients:
## 
##           Estimate Std. Error t value Pr(>|t|)   
## Green_inn   5.6853     1.7842  3.1864 0.001528 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coeftest(young_ext, vcov. = vcovHC, type = "HC1")
## 
## t test of coefficients:
## 
##                 Estimate  Std. Error  t value  Pr(>|t|)    
## Green_inn     1.1237e+00  1.3306e+00   0.8445 0.3987775    
## Sales_growth -4.4052e-05  8.5592e-05  -0.5147 0.6070005    
## Leverage     -9.6627e-01  7.8295e-02 -12.3415 < 2.2e-16 ***
## ROA           3.1695e-02  2.9857e-02   1.0616 0.2889265    
## Size          5.9404e+00  1.6570e+00   3.5850 0.0003694 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Old companies

  • Older firms are not affected by their spending in GI shortly.

  • Their financial performance in a particular year can be heavily dependent on other finance-related factors, including leverage ratio, size, as evidenced by significant coefficients (\(-9.6627e-01\), \(5.9404e+00\) respectively). This relationship has been confirmed with their strong correlation, as demonstrated in Section III. Data Descriptives.

# Baseline model
old_base <- plm(formula = Firm_value ~ Green_inn, 
               data = old_group, 
               index = c("Firm_id", "Year"), 
               model = "within",
               effect = "twoways")

# Extended model
old_ext <- plm(formula = Firm_value ~ Green_inn + Location + Age +
                   Sales_growth + Leverage + Year + ROA + Size,
               data = old_group, 
               index = c("Firm_id", "Year"), 
               model = "within",
               effect = "twoways")

# Print results using robust standard errors
coeftest(old_base, vcov. = vcovHC, type = "HC1")
## 
## t test of coefficients:
## 
##           Estimate Std. Error t value Pr(>|t|)
## Green_inn   2.1067     2.6857  0.7844   0.4364
coeftest(old_ext, vcov. = vcovHC, type = "HC1")
## 
## t test of coefficients:
## 
##                Estimate Std. Error  t value  Pr(>|t|)    
## Green_inn     0.4025810  2.7746274   0.1451  0.885257    
## Sales_growth -0.0066424  0.0368536  -0.1802  0.857742    
## Leverage     -0.8852449  0.0621912 -14.2342 < 2.2e-16 ***
## ROA           0.1702112  0.0735800   2.3133  0.025131 *  
## Size         10.0239107  3.1349865   3.1974  0.002481 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

II. Long run effects

1. Lagged years

Young companies

  • For 1-year lag, the coefficient is positive (\(4.7720\), \(s.e. = 1.7981\)) and significant at 1% but becomes insignificant as control variables are included (\(1.9434e+00\), \(s.e. = 1.9434e+00\)).

  • For 2-year lag, GI estimate produced with the baseline equation is still statistically significant (\(4.9344\)), but at 5%. The extended equation produces a positive but insignificant estimate of \(2.2639e+00\) (\(s.e. = 1.9776\)).

\(\implies\) In the long-run, GI might not matter to young companies.

# Baseline
  # 1 year lag
lag1_ybase <- plm(formula = Firm_value ~  lag(Green_inn, 1),
                 data = subset(data, Age <=50),
                 index = c("Firm_id", "Year"),
                 effect = "twoways",
                 model = "within",
                 na.action = na.exclude)

  # 2 years lag
lag2_ybase <- plm(formula = Firm_value ~ lag(Green_inn, 2),
                 data = subset(data, Age <=50),
                 index = c("Firm_id", "Year"),
                 effect = "twoways",
                 model = "within",
                 na.action = na.exclude)


# Extended model
  # 1 year lag
lag1_yext <- plm(formula = Firm_value ~  lag(Green_inn, 1) + Location + Age +
                   Sales_growth + Leverage + Year + ROA + Size,
                 data = subset(data, Age <=50),
                 index = c("Firm_id", "Year"),
                 effect = "twoways",
                 model = "within",
                 na.action = na.exclude)
  
  # 2 years lag
lag2_yext <- plm(formula = Firm_value ~ lag(Green_inn, 2) + Location + Age +
                   Sales_growth + Leverage + Year + ROA + Size,
                 data = subset(data, Age <=50),
                 index = c("Firm_id", "Year"),
                 effect = "twoways",
                 model = "within",
                 na.action = na.exclude)

# Print results using robust standard errors
coeftest(lag1_ybase, vcov. = vcovHC, type = "HC1")
## 
## t test of coefficients:
## 
##                   Estimate Std. Error t value Pr(>|t|)   
## lag(Green_inn, 1)   4.7720     1.7981  2.6539 0.008233 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coeftest(lag1_yext, vcov. = vcovHC, type = "HC1")
## 
## t test of coefficients:
## 
##                      Estimate  Std. Error  t value Pr(>|t|)    
## lag(Green_inn, 1)  1.9434e+00  1.2089e+00   1.6075  0.10864    
## Sales_growth      -4.9431e-05  9.6608e-05  -0.5117  0.60913    
## Leverage          -9.3802e-01  9.3431e-02 -10.0396  < 2e-16 ***
## ROA                6.9314e-02  3.4935e-02   1.9841  0.04784 *  
## Size               3.9888e+00  2.1177e+00   1.8835  0.06027 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coeftest(lag2_ybase, vcov. = vcovHC, type = "HC1")
## 
## t test of coefficients:
## 
##                   Estimate Std. Error t value Pr(>|t|)  
## lag(Green_inn, 2)   4.9344     1.9776  2.4951  0.01298 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coeftest(lag2_yext, vcov. = vcovHC, type = "HC1")
## 
## t test of coefficients:
## 
##                      Estimate  Std. Error  t value Pr(>|t|)    
## lag(Green_inn, 2)  2.2639e+00  1.4616e+00   1.5489  0.12218    
## Sales_growth      -4.5082e-05  9.6676e-05  -0.4663  0.64124    
## Leverage          -9.3212e-01  9.1019e-02 -10.2409  < 2e-16 ***
## ROA                6.0757e-02  3.5066e-02   1.7327  0.08391 .  
## Size               4.2437e+00  2.3016e+00   1.8438  0.06594 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Old companies

  • In both 1-year or 2-year lag equations, the coefficients of GI are insignificant at all levels, meaning that GI do not contribute to the future value of older businesses.

  • Again, other financial factors tend to better explain the success of firms in this group, as mentioned in Section IV. 4.3.1. Short-run effects.

# Baseline
  # 1 year lag
lag1_obase <- plm(formula = Firm_value ~  lag(Green_inn, 1),
                 data = old_group,
                 index = c("Firm_id", "Year"),
                 effect = "twoways",
                 model = "within",
                 na.action = na.exclude)

  # 2 years lag
lag2_obase <- plm(formula = Firm_value ~ lag(Green_inn, 2),
                 data = old_group,
                 index = c("Firm_id", "Year"),
                 effect = "twoways",
                 model = "within",
                 na.action = na.exclude)


# Extended model
  # 1 year lag
lag1_oext <- plm(formula = Firm_value ~ lag(Green_inn, 1) + Location + Age +
                   Sales_growth + Leverage + Year + ROA + Size,
                 data = old_group,
                 index = c("Firm_id", "Year"),
                 effect = "twoways",
                 model = "within",
                 na.action = na.exclude)
  
  # 2 years lag
lag2_oext <- plm(formula = Firm_value ~ lag(Green_inn, 2) + Location + Age +
                   Sales_growth + Leverage + Year + ROA + Size,
                 data = old_group,
                 index = c("Firm_id", "Year"),
                 effect = "twoways",
                 model = "within",
                 na.action = na.exclude)

# Print results using robust standard errors
coeftest(lag1_obase, vcov. = vcovHC, type = "HC1")
## 
## t test of coefficients:
## 
##                   Estimate Std. Error t value Pr(>|t|)
## lag(Green_inn, 1)   1.8153     2.3061  0.7872   0.4351
coeftest(lag1_oext, vcov. = vcovHC, type = "HC1")
## 
## t test of coefficients:
## 
##                    Estimate Std. Error  t value  Pr(>|t|)    
## lag(Green_inn, 1)  0.294170   2.878900   0.1022  0.919088    
## Sales_growth      -0.032177   0.059958  -0.5367  0.594266    
## Leverage          -0.879040   0.052948 -16.6019 < 2.2e-16 ***
## ROA                0.166858   0.059037   2.8263  0.007116 ** 
## Size               8.317248   4.087663   2.0347  0.048075 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coeftest(lag2_obase, vcov. = vcovHC, type = "HC1")
## 
## t test of coefficients:
## 
##                   Estimate Std. Error t value Pr(>|t|)
## lag(Green_inn, 2)  0.51153    1.33769  0.3824    0.704
coeftest(lag2_oext, vcov. = vcovHC, type = "HC1")
## 
## t test of coefficients:
## 
##                    Estimate Std. Error  t value  Pr(>|t|)    
## lag(Green_inn, 2)  1.529684   2.670418   0.5728 0.5700507    
## Sales_growth      -0.116577   0.057262  -2.0358 0.0485991 *  
## Leverage          -0.878394   0.041120 -21.3620 < 2.2e-16 ***
## ROA                0.175896   0.049011   3.5889 0.0009153 ***
## Size               8.335318   3.660175   2.2773 0.0283324 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

2. Leading years

  • Baseline models of lags and leads produce the same inferences. However, extended models using leads show opposite results to those using lags.

Young companies

  • GI estimates remain positive and significant at 0.01% (extended models) and 1% (baseline models). For example, \(lead1\_ybase\) and \(lead1\_yext\) estimates are \(4.7720\) (\(s.e. = 1.7981\)) and \(3.6078e+00\) (\(s.e. = 1.6558e+00\)), which are comparable to those produced with \(lead2\_ybase\) and \(lead2\_yext\), \(4.9344\) (\(s.e. = 1.9776\)) and \(4.9849e+00\) (\(s.e. = 1.9582e+00\)) respectively.

  • Looking at the largest effect, on average, per \(1\)% leverage in the eco-conscious innovation, business values of a young company can increase by approximately \(4.98\)%, ceteris paribus.

\(\implies\) The change in the performance of young biotechnology companies can be mainly attributed to sustainable technological development. This inference is supportive of the 2nd hypothesis concerning the long-run influence of GI.

# Baseline model
  # 1 year leading 
lead1_ybase <- plm(lead(Firm_value, 1) ~ Green_inn,
                   data = young_group,
                   index = c("Firm_id", "Year"),
                   effect = "twoways", model = "within",
                   na.action = na.exclude)

  # 2 years leading 
lead2_ybase <- plm(lead(Firm_value, 2) ~ Green_inn,
                   data = young_group, 
                   index = c("Firm_id", "Year"),
                   effect = "twoways", model = "within",
                   na.action = na.exclude)

# Extended model
  # 1 year leading 
lead1_yext <- plm(lead(Firm_value, 1) ~ Green_inn + Location + Age +
                    Sales_growth + Leverage + Year + ROA + Size,
                   data = young_group, 
                   index = c("Firm_id", "Year"),
                   effect = "twoways", model = "within",
                   na.action = na.exclude)

  # 2 years leading 
lead2_yext <- plm(lead(Firm_value, 2) ~ Green_inn + Location + Age +
                   Sales_growth + Leverage + Year + ROA + Size,
                   data = young_group, 
                   index = c("Firm_id", "Year"),
                   effect = "twoways", model = "within",
                   na.action = na.exclude)

# Print results using robust standard errors
coeftest(lead1_ybase, vcov. = vcovHC, type = "HC1")
## 
## t test of coefficients:
## 
##           Estimate Std. Error t value Pr(>|t|)   
## Green_inn   4.7720     1.7981  2.6539 0.008233 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coeftest(lead1_yext, vcov. = vcovHC, type = "HC1")
## 
## t test of coefficients:
## 
##                 Estimate  Std. Error t value  Pr(>|t|)    
## Green_inn     3.6078e+00  1.6558e+00  2.1789   0.02985 *  
## Sales_growth  3.1724e-05  4.0622e-05  0.7810   0.43524    
## Leverage     -4.9766e-01  1.1861e-01 -4.1956 3.274e-05 ***
## ROA           5.8446e-02  4.0450e-02  1.4449   0.14918    
## Size         -1.1890e+00  1.5850e+00 -0.7502   0.45354    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coeftest(lead2_ybase, vcov. = vcovHC, type = "HC1")
## 
## t test of coefficients:
## 
##           Estimate Std. Error t value Pr(>|t|)  
## Green_inn   4.9344     1.9776  2.4951  0.01298 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coeftest(lead2_yext, vcov. = vcovHC, type = "HC1")
## 
## t test of coefficients:
## 
##                 Estimate  Std. Error t value  Pr(>|t|)    
## Green_inn     4.9849e+00  1.9582e+00  2.5457 0.0112728 *  
## Sales_growth  8.4492e-06  3.9450e-05  0.2142 0.8305170    
## Leverage     -3.5673e-01  9.8457e-02 -3.6232 0.0003277 ***
## ROA           1.5088e-02  4.4681e-02  0.3377 0.7357843    
## Size         -2.9497e+00  1.5305e+00 -1.9272 0.0546473 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Old companies

  • Again, in no cases GI supplements or destroys value of longer-standing corporations.
# Baseline model
  # 1 year leading 
lead1_obase <- plm(lead(Firm_value, 1) ~ Green_inn,
                   data = old_group,
                   index = c("Firm_id", "Year"),
                   effect = "twoways", model = "within",
                   na.action = na.exclude)

  # 2 years leading 
lead2_obase <- plm(lead(Firm_value, 2) ~ Green_inn,
                   data = old_group, 
                   index = c("Firm_id", "Year"),
                   effect = "twoways", model = "within",
                   na.action = na.exclude)

# Extended model
  # 1 year leading 
lead1_oext <- plm(lead(Firm_value, 1) ~ Green_inn + Location + Age +
                   Sales_growth + Leverage + Year + ROA + Size,
                   data = old_group, 
                   index = c("Firm_id", "Year"),
                   effect = "twoways", model = "within",
                   na.action = na.exclude)

  # 2 years leading 
lead2_oext <- plm(lead(Firm_value, 2) ~ Green_inn + Location + Age +
                   Sales_growth + Leverage + Year + ROA + Size,
                   data = old_group, 
                   index = c("Firm_id", "Year"),
                   effect = "twoways", model = "within",
                   na.action = na.exclude)

# Print results using robust standard errors
coeftest(lead1_obase, vcov. = vcovHC, type = "HC1")
## 
## t test of coefficients:
## 
##           Estimate Std. Error t value Pr(>|t|)
## Green_inn   1.8153     2.3061  0.7872   0.4351
coeftest(lead2_obase, vcov. = vcovHC, type = "HC1")
## 
## t test of coefficients:
## 
##           Estimate Std. Error t value Pr(>|t|)
## Green_inn  0.51153    1.33769  0.3824    0.704
coeftest(lead1_oext, vcov. = vcovHC, type = "HC1")
## 
## t test of coefficients:
## 
##               Estimate Std. Error t value  Pr(>|t|)    
## Green_inn    -0.687542   4.311279 -0.1595    0.8740    
## Sales_growth  0.083220   0.120849  0.6886    0.4948    
## Leverage     -0.400656   0.078082 -5.1312 6.579e-06 ***
## ROA          -0.081417   0.102451 -0.7947    0.4312    
## Size         -5.897963   9.873463 -0.5974    0.5534    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coeftest(lead2_oext, vcov. = vcovHC, type = "HC1")
## 
## t test of coefficients:
## 
##                 Estimate  Std. Error t value Pr(>|t|)  
## Green_inn     -2.4855757   2.9359834 -0.8466  0.40239  
## Sales_growth   0.1489306   0.1071246  1.3903  0.17234  
## Leverage       0.0099195   0.0737554  0.1345  0.89371  
## ROA           -0.1976160   0.1021143 -1.9352  0.06024 .
## Size         -17.1883402  14.4269038 -1.1914  0.24069  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Sub-conclusions 2:

  • Outcomes are inconsistent between models of lag and leads on the young sample. Extended equations with lags report some insignificant but positive estimates of GI. In contrast, estimates of GI are consistently positive and significant across all equations with leads (1).

  • The impact of GI is more visible for younger businesses. GI can be constructive to this group in the long run.

  • In either type of equation, GI has no effect on older companies. However, it does not generate negative outcomes as expected in the first hypothesis (2). Rather, other variables, namely \(Leverage\) and \(Size\) contribute much to this group’s success.

\(\implies\) As results (1) and (2) are also found in regressions using full sample, it can be confirmed that GI does not have an adverse impact on biotechnology enterprises’ value in short or long term.


Robustness Checks

Data skewness

Section III. Data Descriptives demonstrates that the number of patents obtained by research companies varies dramatically, ranging from 0 to as high as 5,616 patents. I suspect that such a difference can cause bias that cannot be perfectly handled by log transformation. Therefore, I will remove all the observations with over 4,000 patents, then run the regressions using Fixed Effects estimator. The data set without outliers is named cleaned data.

  • Removing abnormalities slightly enhances the distribution of variable.

  • Estimates and standard errors produced with original data and cleaned data are relatively comparable. Hence, similar interpretations are obtained.

\(\implies\) Log transformation can efficiently control for data skewness in the patent count variable and thus results are robust.


Abnormalities removal

# Create a new dataset containing observations with at most 4,000 patents
cleaned_data <- data %>% filter(Count <= 4000) %>% mutate(Green_inn = log(1 + Count))

# Statistics of the new data set
summary(cleaned_data)
##    Firm_id              Firm                Year        Firm_value    
##  Length:627         Length:627         Min.   :2006   Min.   :-5.909  
##  Class :character   Class :character   1st Qu.:2009   1st Qu.:52.786  
##  Mode  :character   Mode  :character   Median :2012   Median :70.098  
##                                        Mean   :2012   Mean   :65.622  
##                                        3rd Qu.:2016   3rd Qu.:83.711  
##                                        Max.   :2019   Max.   :99.149  
##      Count          Green_inn       Location              Age        
##  Min.   :   0.0   Min.   :0.000   Length:627         Min.   :  0.00  
##  1st Qu.:  13.0   1st Qu.:2.639   Class :character   1st Qu.: 12.00  
##  Median :  38.0   Median :3.664   Mode  :character   Median : 17.00  
##  Mean   : 406.2   Mean   :4.040                      Mean   : 24.65  
##  3rd Qu.: 218.0   3rd Qu.:5.389                      3rd Qu.: 22.00  
##  Max.   :3994.0   Max.   :8.293                      Max.   :123.00  
##      Sales           Sales_growth       Total_assets        Total_debt      
##  Min.   :     0.0   Min.   : -100.00   Min.   :    14.9   Min.   :     0.0  
##  1st Qu.:   218.2   1st Qu.:  -17.72   1st Qu.:  2053.2   1st Qu.:     0.0  
##  Median :  2693.0   Median :    7.86   Median : 29236.2   Median :    82.5  
##  Mean   : 11681.3   Mean   :  357.60   Mean   : 48625.1   Mean   :  5757.0  
##  3rd Qu.: 15824.5   3rd Qu.:   42.39   3rd Qu.: 61058.5   3rd Qu.:  4191.1  
##  Max.   :194033.2   Max.   :94150.00   Max.   :625686.0   Max.   :183618.0  
##     Leverage      Tshare_equity           ROA                ROE          
##  Min.   : 0.000   Min.   :  -657.8   Min.   :-142.569   Min.   :-415.395  
##  1st Qu.: 0.000   1st Qu.:  1050.5   1st Qu.: -33.466   1st Qu.: -49.292  
##  Median : 2.195   Median : 15018.7   Median : -14.277   Median : -20.401  
##  Mean   : 9.916   Mean   : 31752.7   Mean   : -17.785   Mean   : -27.421  
##  3rd Qu.:15.002   3rd Qu.: 38613.9   3rd Qu.:   3.387   3rd Qu.:   5.898  
##  Max.   :77.582   Max.   :470117.0   Max.   :  71.200   Max.   : 290.151  
##       Size       
##  Min.   : 2.703  
##  1st Qu.: 7.627  
##  Median :10.283  
##  Mean   : 9.350  
##  3rd Qu.:11.020  
##  Max.   :13.347

Variable distribution

  • The figures compare how \(Green\_ innovation\) variable is distributed in cleaned data and original data. They show a slight improvement in its distribution.
# Visualization of patent count
par(mfrow = c(1, 2))
boxplot(cleaned_data$Green_inn)
boxplot(data$Green_inn)


Short-run effects regression

# Baseline model
robust_base <- plm(Firm_value ~ Green_inn,
                   data = cleaned_data,
                   index = c("Firm_id", "Year"),
                   effect = "twoways", model = "within",
                   na.action = na.exclude)

# Extended model
robust_ext <- plm(Firm_value ~ Green_inn + Location + Age +
                   Sales_growth + Leverage + Year + ROA + Size,
                  data = cleaned_data,
                  index = c("Firm_id", "Year"),
                  effect = "twoways", model = "within",
                  na.action = na.exclude)

Long-run effects regression

# Lagged years
  # lag 1 year
lag1_rob_base <- plm(Firm_value ~ lag(Green_inn, 1),
                   data = cleaned_data,
                   index = c("Firm_id", "Year"),
                   effect = "twoways", model = "within",
                   na.action = na.exclude)

lag1_rob_ext <- plm(Firm_value ~ lag(Green_inn, 1) + Location + Age +
                   Sales_growth + Leverage + Year + ROA + Size,
                  data = cleaned_data,
                  index = c("Firm_id", "Year"),
                  effect = "twoways", model = "within",
                  na.action = na.exclude)
  # lag 2 years
lag2_rob_base <- plm(Firm_value ~ lag(Green_inn, 1),
                   data = cleaned_data,
                   index = c("Firm_id", "Year"),
                   effect = "twoways", model = "within",
                   na.action = na.exclude)

lag2_rob_ext <- plm(Firm_value ~ lag(Green_inn, 2) + Location + Age +
                   Sales_growth + Leverage + Year + ROA + Size,
                  data = cleaned_data,
                  index = c("Firm_id", "Year"),
                  effect = "twoways", model = "within",
                  na.action = na.exclude)

# Leading years
  # lead 1 year
lead1_rob_base <- plm(lead(Firm_value, 1) ~ Green_inn,
                   data = cleaned_data,
                   index = c("Firm_id", "Year"),
                   effect = "twoways", model = "within",
                   na.action = na.exclude)

lead1_rob_ext <- plm(lead(Firm_value, 1) ~ Green_inn + Location + Age +
                   Sales_growth + Leverage + Year + ROA + Size,
                   data = cleaned_data, 
                   index = c("Firm_id", "Year"),
                   effect = "twoways", model = "within",
                   na.action = na.exclude)

  # lead 2 year
lead2_rob_base <- plm(lead(Firm_value, 2) ~ Green_inn,
                   data = cleaned_data,
                   index = c("Firm_id", "Year"),
                   effect = "twoways", model = "within",
                   na.action = na.exclude)

lead2_rob_ext <- plm(lead(Firm_value, 2) ~ Green_inn + Location + Age +
                   Sales_growth + Leverage + Year + ROA + Size,
                   data = cleaned_data, 
                   index = c("Firm_id", "Year"),
                   effect = "twoways", model = "within",
                   na.action = na.exclude)

Results comparison

# Short-run effects
coeftest(fix_base, vcov. = vcovHC, type = "HC1")
## 
## t test of coefficients:
## 
##           Estimate Std. Error t value Pr(>|t|)   
## Green_inn   5.6195     1.7137  3.2791 0.001104 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coeftest(robust_base, vcov. = vcovHC, type = "HC1")
## 
## t test of coefficients:
## 
##           Estimate Std. Error t value  Pr(>|t|)    
## Green_inn   5.6601     1.7079   3.314 0.0009788 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coeftest(fix_ext, vcov. = vcovHC, type = "HC1")
## 
## t test of coefficients:
## 
##                 Estimate  Std. Error  t value  Pr(>|t|)    
## Green_inn     1.1263e+00  1.3012e+00   0.8655 0.3870968    
## Sales_growth -4.4859e-05  8.6038e-05  -0.5214 0.6022972    
## Leverage     -9.5759e-01  7.2035e-02 -13.2934 < 2.2e-16 ***
## ROA           3.3095e-02  2.9773e-02   1.1116 0.2667789    
## Size          6.0125e+00  1.6474e+00   3.6497 0.0002863 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coeftest(robust_ext, vcov. = vcovHC, type = "HC1")
## 
## t test of coefficients:
## 
##                 Estimate  Std. Error  t value  Pr(>|t|)    
## Green_inn     1.0997e+00  1.2966e+00   0.8481    0.3967    
## Sales_growth -4.5730e-05  8.6516e-05  -0.5286    0.5973    
## Leverage     -9.5065e-01  7.7618e-02 -12.2478 < 2.2e-16 ***
## ROA           2.6614e-02  2.9051e-02   0.9161    0.3600    
## Size          6.4859e+00  1.6240e+00   3.9938 7.374e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Long-run effects
  # lagged values
coeftest(lag1_base, vcov. = vcovHC, type = "HC1")
## 
## t test of coefficients:
## 
##                   Estimate Std. Error t value Pr(>|t|)   
## lag(Green_inn, 1)   4.6127     1.7279  2.6695 0.007834 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coeftest(lag1_rob_base, vcov. = vcovHC, type = "HC1")
## 
## t test of coefficients:
## 
##                   Estimate Std. Error t value Pr(>|t|)   
## lag(Green_inn, 1)   4.5738     1.6980  2.6937 0.007305 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coeftest(lag2_base, vcov. = vcovHC, type = "HC1")
## 
## t test of coefficients:
## 
##                   Estimate Std. Error t value Pr(>|t|)  
## lag(Green_inn, 2)   4.7086     1.9057  2.4708  0.01384 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coeftest(lag2_rob_base, vcov. = vcovHC, type = "HC1")
## 
## t test of coefficients:
## 
##                   Estimate Std. Error t value Pr(>|t|)   
## lag(Green_inn, 1)   4.5738     1.6980  2.6937 0.007305 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coeftest(lag1_ext, vcov. = vcovHC, type = "HC1")
## 
## t test of coefficients:
## 
##                      Estimate  Std. Error  t value Pr(>|t|)    
## lag(Green_inn, 1)  1.8744e+00  1.1942e+00   1.5696  0.11713    
## Sales_growth      -5.1034e-05  9.7160e-05  -0.5253  0.59963    
## Leverage          -9.2992e-01  8.5828e-02 -10.8347  < 2e-16 ***
## ROA                7.0163e-02  3.4531e-02   2.0319  0.04268 *  
## Size               4.1242e+00  2.0953e+00   1.9683  0.04957 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coeftest(lag1_rob_ext, vcov. = vcovHC, type = "HC1")
## 
## t test of coefficients:
## 
##                      Estimate  Std. Error  t value Pr(>|t|)    
## lag(Green_inn, 1)  1.8427e+00  1.1914e+00   1.5466  0.12259    
## Sales_growth      -5.1376e-05  9.6745e-05  -0.5310  0.59563    
## Leverage          -9.2505e-01  9.2243e-02 -10.0284  < 2e-16 ***
## ROA                6.2330e-02  3.4526e-02   1.8053  0.07163 .  
## Size               4.5454e+00  2.1806e+00   2.0844  0.03763 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coeftest(lag2_ext, vcov. = vcovHC, type = "HC1")
## 
## t test of coefficients:
## 
##                      Estimate  Std. Error  t value Pr(>|t|)    
## lag(Green_inn, 2)  2.2302e+00  1.4241e+00   1.5661  0.11801    
## Sales_growth      -4.6299e-05  9.7073e-05  -0.4769  0.63362    
## Leverage          -9.2321e-01  8.4005e-02 -10.9899  < 2e-16 ***
## ROA                6.1532e-02  3.4658e-02   1.7754  0.07649 .  
## Size               4.3855e+00  2.2872e+00   1.9174  0.05580 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coeftest(lag2_rob_ext, vcov. = vcovHC, type = "HC1")
## 
## t test of coefficients:
## 
##                      Estimate  Std. Error  t value Pr(>|t|)    
## lag(Green_inn, 2)  2.2775e+00  1.4298e+00   1.5929   0.1119    
## Sales_growth      -4.3769e-05  9.6046e-05  -0.4557   0.6488    
## Leverage          -9.2031e-01  8.9205e-02 -10.3169   <2e-16 ***
## ROA                5.1561e-02  3.4538e-02   1.4929   0.1362    
## Size               4.8242e+00  2.4065e+00   2.0047   0.0456 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
  # leading values
coeftest(lead1_base, vcov. = vcovHC, type = "HC1")
## 
## t test of coefficients:
## 
##           Estimate Std. Error t value Pr(>|t|)   
## Green_inn   4.6127     1.7279  2.6695 0.007834 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coeftest(lead1_rob_base, vcov. = vcovHC, type = "HC1")
## 
## t test of coefficients:
## 
##           Estimate Std. Error t value Pr(>|t|)   
## Green_inn   4.5738     1.6980  2.6937 0.007305 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coeftest(lead2_base, vcov. = vcovHC, type = "HC1")
## 
## t test of coefficients:
## 
##           Estimate Std. Error t value Pr(>|t|)  
## Green_inn   4.7086     1.9057  2.4708  0.01384 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coeftest(lead2_rob_base, vcov. = vcovHC, type = "HC1")
## 
## t test of coefficients:
## 
##           Estimate Std. Error t value Pr(>|t|)  
## Green_inn   4.8249     1.8903  2.5525  0.01103 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coeftest(lead1_ext, vcov. = vcovHC, type = "HC1")
## 
## t test of coefficients:
## 
##                 Estimate  Std. Error t value  Pr(>|t|)    
## Green_inn     3.4333e+00  1.5799e+00  2.1731   0.03023 *  
## Sales_growth  2.8677e-05  3.8012e-05  0.7544   0.45094    
## Leverage     -4.8823e-01  1.0873e-01 -4.4904 8.786e-06 ***
## ROA           5.9007e-02  3.9735e-02  1.4850   0.13815    
## Size         -1.1442e+00  1.5433e+00 -0.7414   0.45881    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coeftest(lead1_rob_ext, vcov. = vcovHC, type = "HC1")
## 
## t test of coefficients:
## 
##                 Estimate  Std. Error t value  Pr(>|t|)    
## Green_inn     3.3677e+00  1.5551e+00  2.1655   0.03082 *  
## Sales_growth  2.7593e-05  3.8023e-05  0.7257   0.46836    
## Leverage     -4.9149e-01  1.1540e-01 -4.2590 2.457e-05 ***
## ROA           5.0880e-02  3.9246e-02  1.2965   0.19542    
## Size         -9.6922e-01  1.5547e+00 -0.6234   0.53329    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coeftest(lead2_ext, vcov. = vcovHC, type = "HC1")
## 
## t test of coefficients:
## 
##                 Estimate  Std. Error t value  Pr(>|t|)    
## Green_inn     4.7633e+00  1.8656e+00  2.5533 0.0109911 *  
## Sales_growth  8.0165e-06  3.6644e-05  0.2188 0.8269287    
## Leverage     -3.2287e-01  9.2043e-02 -3.5078 0.0004959 ***
## ROA           1.4619e-02  4.3599e-02  0.3353 0.7375535    
## Size         -2.9217e+00  1.4892e+00 -1.9620 0.0503618 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coeftest(lead2_rob_ext, vcov. = vcovHC, type = "HC1")
## 
## t test of coefficients:
## 
##                 Estimate  Std. Error t value  Pr(>|t|)    
## Green_inn     4.7954e+00  1.8367e+00  2.6108 0.0093360 ** 
## Sales_growth  3.2177e-05  3.6303e-05  0.8863 0.3759187    
## Leverage     -3.5672e-01  9.4537e-02 -3.7734 0.0001828 ***
## ROA           2.1199e-02  4.4821e-02  0.4730 0.6364574    
## Size         -3.0095e+00  1.5358e+00 -1.9595 0.0506721 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Estimator Effectiveness

This test determines how reliable the Fixed Effects Regression results are in relative to Random Effects Regression. This is done by, firstly, comparing the coefficients and robust standard errors produced with the two models, and secondly, applying the Hausman test.

The Wallace-Hussain estimator is adopted instead of the Swamy-Arora RE model (default) to perform the Two-ways Random Effects Regressions. This is because the second method uses ‘within variance’, which is not applicable or computable given the dataset. Conversely, the first method is more flexible and does not rely on the within variance.

Random Effects Regressions explores how GI influences firm’s value in the same year using full sample and samples for two age groups of firms. Again, robust standard errors are reported.


Random Effects Models

  • Except for models on the old companies, namely \(rand\_obase\), \(rand\_oext\), all other models produce negative, but insignificant, estimates of GI.

  • Most surprisingly, in the baseline model of the old sample, estimate of GI is significantly positive (\(2.4572\), \(s.e. = 0.1385\)), implying that old bio-based corporations can benefit from eco-conscious technological development. Specifically, \(1\)% increase in GI’s level can leverage the financial performance of a long-standing biotech firm by approximately \(2.45\)%. This is opposite to the findings by Fixed Effects Regressions.

  • Turning to the extended models, no estimates are statistically significant, meaning that GI does not impact firms’ value, regardless of their age group.

# Full sample
  # baseline
rand_base <- plm(formula = Firm_value ~ Green_inn, 
               data = data, 
               index = c("Firm_id", "Year"), 
               model = "random",
               effect = "twoways", 
               random.method = "walhus")


  # extended
rand_ext <- plm(formula = Firm_value ~ Green_inn + Location + Age +
                 Sales_growth + Leverage + Year + ROA + Size,
               data = data, 
               index = c("Firm_id", "Year"), 
               model = "random",
               effect = "twoways", 
               random.method = "walhus")


# Young sample
  # baseline
rand_ybase <- plm(formula = Firm_value ~ Green_inn, 
               data = young_group, 
               index = c("Firm_id", "Year"), 
               model = "random",
               effect = "twoways", 
               random.method = "walhus")
  # extended
rand_yext <- plm(formula = Firm_value ~ Green_inn + Location + Age +
                 Sales_growth + Leverage + Year + ROA + Size,
               data = young_group, 
               index = c("Firm_id", "Year"), 
               model = "random",
               effect = "twoways", 
               random.method = "walhus" )

# Old sample
  # baseline
rand_obase <- plm(formula = Firm_value ~ Green_inn, 
               data = old_group, 
               index = c("Firm_id", "Year"), 
               model = "random",
               effect = "twoways", 
               random.method = "walhus")
  # extended
rand_oext <- plm(formula = Firm_value ~ Green_inn + Location + Age +
                 Sales_growth + Leverage + Year + ROA + Size,
               data = old_group, 
               index = c("Firm_id", "Year"), 
               model = "random",
               effect = "twoways", 
               random.method = "walhus" )


# Print results using robust standard errors
  # baseline
coeftest(rand_base, vcov. = vcovHC, type = "HC1")
## 
## t test of coefficients:
## 
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  70.4051     4.1303 17.0460   <2e-16 ***
## Green_inn    -1.2731     1.0440 -1.2194   0.2231    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coeftest(rand_ybase, vcov. = vcovHC, type = "HC1")
## 
## t test of coefficients:
## 
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  71.8964     4.2955 16.7375   <2e-16 ***
## Green_inn    -1.4199     1.2099 -1.1736   0.2411    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coeftest(rand_obase, vcov. = vcovHC, type = "HC1")
## 
## t test of coefficients:
## 
##             Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)  41.7265     5.1731  8.0661 1.662e-11 ***
## Green_inn     2.4572     0.1385 17.7414 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
  # extended
coeftest(rand_ext, vcov. = vcovHC, type = "HC1")
## 
## t test of coefficients:
## 
##                 Estimate  Std. Error t value  Pr(>|t|)    
## (Intercept)   6.2884e+01  1.0490e+01  5.9947 3.472e-09 ***
## Green_inn    -6.9006e-01  7.8760e-01 -0.8762  0.381288    
## LocationCH   -1.5374e+01  3.4314e+00 -4.4803 8.886e-06 ***
## LocationDE   -5.1082e+00  3.3563e+00 -1.5220  0.128528    
## LocationDK   -6.6718e+00  5.2339e+00 -1.2747  0.202886    
## LocationFI    4.3782e+00  7.2863e+00  0.6009  0.548141    
## LocationFR   -1.9451e+01  4.7690e+00 -4.0787 5.124e-05 ***
## LocationGB   -1.2205e+01  4.3122e+00 -2.8302  0.004803 ** 
## LocationIT    1.2847e+00  6.1899e+00  0.2075  0.835652    
## LocationNL   -6.1205e+00  6.2599e+00 -0.9777  0.328590    
## LocationNO   -3.3736e+00  3.6888e+00 -0.9146  0.360776    
## LocationSE   -1.2105e+01  4.6544e+00 -2.6008  0.009525 ** 
## Age          -6.2866e-02  7.7222e-02 -0.8141  0.415905    
## Sales_growth -2.7499e-05  9.4297e-05 -0.2916  0.770674    
## Leverage     -9.8099e-01  1.0450e-01 -9.3871 < 2.2e-16 ***
## Year2007      2.1936e+00  2.8660e+00  0.7654  0.444344    
## Year2008      2.5817e+00  2.8108e+00  0.9185  0.358733    
## Year2009      4.2494e-01  3.1574e+00  0.1346  0.892984    
## Year2010     -1.5009e+00  3.3286e+00 -0.4509  0.652212    
## Year2011     -1.9027e+00  3.7341e+00 -0.5095  0.610549    
## Year2012     -4.2387e+00  3.5106e+00 -1.2074  0.227748    
## Year2013     -5.4516e+00  3.2317e+00 -1.6869  0.092130 .  
## Year2014     -1.9889e+00  3.0140e+00 -0.6599  0.509559    
## Year2015     -1.1071e+00  3.4346e+00 -0.3223  0.747303    
## Year2016     -8.9261e-01  3.5742e+00 -0.2497  0.802873    
## Year2017      4.5458e-01  3.4404e+00  0.1321  0.894924    
## Year2018     -3.2467e-01  3.4753e+00 -0.0934  0.925598    
## Year2019     -2.6330e+00  3.7444e+00 -0.7032  0.482204    
## ROA           5.0161e-02  4.7971e-02  1.0457  0.296131    
## Size          3.1512e+00  1.0593e+00  2.9747  0.003047 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coeftest(rand_yext, vcov. = vcovHC, type = "HC1")
## 
## t test of coefficients:
## 
##                 Estimate  Std. Error t value  Pr(>|t|)    
## (Intercept)   5.7034e+01  1.2524e+01  4.5539 6.494e-06 ***
## Green_inn    -1.0636e+00  8.9543e-01 -1.1878  0.235443    
## LocationCH   -9.8247e+00  4.5052e+00 -2.1807  0.029628 *  
## LocationDE   -2.7303e+00  4.4047e+00 -0.6199  0.535597    
## LocationDK   -5.4236e+00  6.3377e+00 -0.8558  0.392499    
## LocationFR   -1.7148e+01  5.1900e+00 -3.3042  0.001015 ** 
## LocationGB   -8.9852e+00  4.7726e+00 -1.8827  0.060275 .  
## LocationNL   -3.4210e+00  7.5467e+00 -0.4533  0.650511    
## LocationNO   -1.2102e+00  4.3663e+00 -0.2772  0.781759    
## LocationSE   -9.9905e+00  4.9440e+00 -2.0207  0.043792 *  
## Age           3.0949e-01  2.9341e-01  1.0548  0.291972    
## Sales_growth -2.9307e-05  9.5701e-05 -0.3062  0.759539    
## Leverage     -9.8861e-01  1.0495e-01 -9.4194 < 2.2e-16 ***
## Year2007      2.0861e+00  3.3266e+00  0.6271  0.530862    
## Year2008      2.1933e+00  3.2708e+00  0.6706  0.502769    
## Year2009     -3.2269e-01  3.6694e+00 -0.0879  0.929957    
## Year2010     -2.7327e+00  3.9271e+00 -0.6959  0.486810    
## Year2011     -3.4294e+00  4.4635e+00 -0.7683  0.442628    
## Year2012     -6.5238e+00  4.3432e+00 -1.5021  0.133656    
## Year2013     -8.1369e+00  4.1113e+00 -1.9791  0.048302 *  
## Year2014     -4.4646e+00  4.2134e+00 -1.0596  0.289789    
## Year2015     -3.6580e+00  4.6906e+00 -0.7799  0.435810    
## Year2016     -3.5885e+00  5.2633e+00 -0.6818  0.495660    
## Year2017     -2.8204e+00  5.2069e+00 -0.5417  0.588263    
## Year2018     -4.0069e+00  5.3154e+00 -0.7538  0.451278    
## Year2019     -6.7086e+00  5.8115e+00 -1.1544  0.248853    
## ROA           4.8140e-02  4.5464e-02  1.0589  0.290134    
## Size          3.2060e+00  1.1138e+00  2.8783  0.004154 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coeftest(rand_oext, vcov. = vcovHC, type = "HC1")
## 
## t test of coefficients:
## 
##                 Estimate  Std. Error  t value  Pr(>|t|)    
## (Intercept)  333.4238061  71.0459116   4.6931 2.355e-05 ***
## Green_inn      0.4025810   3.2629650   0.1234 0.9023332    
## LocationDK   -27.2558815   7.4045262  -3.6810 0.0005979 ***
## LocationFI   -16.9055801   2.4928177  -6.7817 1.753e-08 ***
## LocationIT   -47.5541979  18.5400341  -2.5649 0.0135721 *  
## Age           -3.4677146   0.7560741  -4.5865 3.353e-05 ***
## Sales_growth  -0.0066424   0.0433399  -0.1533 0.8788479    
## Leverage      -0.8852449   0.0731369 -12.1039 4.767e-16 ***
## Year2007       3.1755341   2.4927430   1.2739 0.2089584    
## Year2008       5.9472080   3.1555380   1.8847 0.0656639 .  
## Year2009       6.0952033   4.3431558   1.4034 0.1670689    
## Year2010       8.7403159   5.3167465   1.6439 0.1068660    
## Year2011      10.6056016   6.0911972   1.7411 0.0882027 .  
## Year2012      14.6875574   6.6378018   2.2127 0.0318112 *  
## Year2013      16.8720520   7.4722894   2.2579 0.0286331 *  
## Year2014      18.6807389   8.5917220   2.1743 0.0347533 *  
## Year2015      20.6375336   9.4298611   2.1885 0.0336352 *  
## Year2016      22.5179046  10.3358555   2.1786 0.0344089 *  
## Year2017      28.8242838   9.4938856   3.0361 0.0039010 ** 
## Year2018      31.1998105   9.5216418   3.2767 0.0019782 ** 
## Year2019      31.6032512  10.8543125   2.9116 0.0054839 ** 
## ROA            0.1702112   0.0865301   1.9671 0.0550934 .  
## Size          10.0239107   3.6867477   2.7189 0.0091471 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Hausman Test

The Hausman Test checks if the Fixed Effects Regression should be preferred to the Random Effects.

  • Null hypothesis \(H_0\): The preferred model is Random Effects;

  • Alternate hypothesis \(H_1\): The preferred model is Fixed Effects.

Decision rule: Reject \(H_0\) if p < (less than 0.05) in favor of \(H_1\).

Results:

  • Both baseline and extended equations estimated with the Fixed Effects method are preferred.

\(\implies\) It is reasonable to reject they null hypothesis that Random Effects Regression works better. This supports this analysis’s method and previously discussed findings.

# Compare baseline models in different samples
phtest(rand_base, fix_base)
## 
##  Hausman Test
## 
## data:  Firm_value ~ Green_inn
## chisq = 26.53, df = 1, p-value = 2.594e-07
## alternative hypothesis: one model is inconsistent
phtest(rand_ybase, young_base)
## 
##  Hausman Test
## 
## data:  Firm_value ~ Green_inn
## chisq = 24.812, df = 1, p-value = 6.32e-07
## alternative hypothesis: one model is inconsistent
phtest(rand_obase, old_base)
## 
##  Hausman Test
## 
## data:  Firm_value ~ Green_inn
## chisq = 0.0070949, df = 1, p-value = 0.9329
## alternative hypothesis: one model is inconsistent
# Compare extended models in different samples
phtest(fix_ext, rand_ext)
## 
##  Hausman Test
## 
## data:  Firm_value ~ Green_inn + Location + Age + Sales_growth + Leverage +  ...
## chisq = 17.253, df = 5, p-value = 0.004044
## alternative hypothesis: one model is inconsistent
phtest(young_ext, rand_yext)
## 
##  Hausman Test
## 
## data:  Firm_value ~ Green_inn + Location + Age + Sales_growth + Leverage +  ...
## chisq = 15.932, df = 5, p-value = 0.00704
## alternative hypothesis: one model is inconsistent
phtest(old_ext, rand_oext)
## 
##  Hausman Test
## 
## data:  Firm_value ~ Green_inn + Location + Age + Sales_growth + Leverage +  ...
## chisq = 1.0244e-13, df = 5, p-value = 1
## alternative hypothesis: one model is inconsistent

Sub-conclusions 3:

  • The Random Effects Estimator yields opposite results to the Fixed Effects Regression. Fixed Effects confirms that in the short-run, old companies are unaffected by GI, whereas the Random Effects method suggests a positive infuennce of GI on financial performances of long-established companies.

  • However, the Hausman test proves that the Fixed Effects estimator is more appropriate, it should be adopted as the key methodology for this analysis. Therefore, only findings by the Fixed Effects estimator are considered.


Section V: Results

This section creates tables that summarize all the regression results. These tables will be presented in the main word file. Key findings are provided in the next section, some of which will be highlighted and further interpreted in the text document.

Pooled OLS Regression

Model summary

modelsummary(list("Basline" = lm_base, 
                  "Extended" = lm_ext1, 
                  "Extended - ROE" = lm_ext2, 
                  "Extended - ROA" = lm_ext3),
             output = "kableExtra")
Basline Extended Extended - ROE Extended - ROA
(Intercept) 77.674 52.658 6.084 24.302
(1.840) (296.810) (301.864) (299.037)
Green_inn -3.036 -1.348 -1.347 -1.416
(0.391) (0.307) (0.312) (0.309)
LocationCH -11.927 -12.212 -12.399
(3.467) (3.527) (3.491)
LocationDE -4.586 -4.683 -4.470
(3.493) (3.554) (3.520)
LocationDK -7.168 -7.855 -7.725
(3.655) (3.716) (3.680)
LocationFI 4.608 3.358 3.229
(5.627) (5.720) (5.656)
LocationFR -15.603 -16.495 -16.075
(3.442) (3.498) (3.467)
LocationGB -12.355 -11.612 -11.867
(3.367) (3.423) (3.390)
LocationIT -1.485 -1.853 -2.369
(5.301) (5.394) (5.336)
LocationNL -9.303 -11.946 -10.610
(4.639) (4.688) (4.659)
LocationNO 2.130 1.260 1.093
(5.027) (5.112) (5.057)
LocationSE -7.358 -7.825 -7.138
(3.713) (3.777) (3.742)
Age -0.068 -0.053 -0.069
(0.029) (0.030) (0.030)
Sales_growth 0.000 0.000 0.000
(0.000) (0.000) (0.000)
Leverage -0.981 -1.033 -1.016
(0.042) (0.042) (0.041)
Year 0.015 0.038 0.030
(0.148) (0.150) (0.149)
ROA -0.123 0.022
(0.037) (0.022)
ROE 0.084 0.037
(0.017) (0.010)
Size 0.920 1.064 1.015
(0.308) (0.312) (0.309)
Num.Obs. 646 646 646 646
R2 0.086 0.622 0.608 0.616
R2 Adj. 0.084 0.611 0.598 0.605
AIC 5817.6 5280.5 5302.1 5289.8
BIC 5831.0 5369.9 5387.0 5374.7
Log.Lik. -2905.800 -2620.266 -2632.030 -2625.878
F 60.381 57.389 57.364 59.177
RMSE 21.77 14.18 14.43 14.30

Translate into table

# Create a list containing all Pooled OLS models
pooled_models <- c("Firm_value ~ Green_inn",
                   "Firm_value ~ Green_inn + Location + Age +
                   Sales_growth + Leverage + Year + ROA + ROE + Size",
                   "Firm_value ~ Green_inn + Location + Age +
                   Sales_growth +  Leverage + Year + ROA + Size",
                   "Firm_value ~ Green_inn + Location + Age +
                   Sales_growth + Leverage + Year + ROE + Size")

# Create a dataframe containing estimates and standard errors
table5 <- data.frame(NULL)

# For-loop to add values to the table
for (i in 1:4) {
  model <- lm(
    formula = as.formula(paste(pooled_models[i])),
    data = data)
  
  # extract coefficients
  y1 <- coef(summary(model))[2, 1] %>% round(3)
  # extract standard errors
  y2 <- coef(summary(model))[2, 2] %>% round(3)
  # number of observations
  obs <- format(round(nobs(model), 0))
  # significance levels
  sig <- c("***")
  
  # assigning values to the table
  table5[i, 1] <- y1         
  table5[i, 2] <- y2         
  table5[5, 1:2] <- obs
  table5[i, 3] <- sig
  table5[5, 3] <- ""
}

# Format table
  # rename rows and columns
colnames(table5) <- c("<b>Coefficients<b>", "<b>Standard errors<b>", "<b>Significance<b>")
rownames(table5) <- c("GI",
                   "GI + Location + Age +
                   Sales_growth + Leverage + Year + ROA + ROE + Size",
                   "GI + Location + Age +
                   Sales_growth +  Leverage + Year + ROA + Size",
                   "GI + Location + Age +
                   Sales_growth + Leverage + Year + ROE + Size", 
                   "<b>Observations<b>")

  # styling table
table5 <- kable(
  x = table5,
  format = "html",
  size = 10, escape = FALSE,
  caption = "<b>TABLE 5: <i>Pooled OLS Regression",
  align = "c") %>%
  kable_classic(full_width = F, html_font = "calibri", position = "left") %>%
  
  # notify variable columns
  pack_rows("Model specifications", 1, 4, bold = TRUE) %>%
  
  # footnotes for table 
  footnote(general = "The table reports the coefficients and stardard errors estimated with the Pooled OLS using the full sample. A variable is considered significant if the ratio between its coefficient and its standard error is equal to or greater than 2.",
  footnote_as_chunk = TRUE)
  
  # save table
  save_kable(x = table5, file = "Table 5.png", zoom = 1.5)
table5
TABLE 5: Pooled OLS Regression
Coefficients Standard errors Significance
Model specifications
GI -3.036 0.391 ***
GI + Location + Age + Sales_growth + Leverage + Year + ROA + ROE + Size -1.348 0.307 ***
GI + Location + Age + Sales_growth + Leverage + Year + ROA + Size -1.347 0.312 ***
GI + Location + Age + Sales_growth + Leverage + Year + ROE + Size -1.416 0.309 ***
Observations 646 646
Note: The table reports the coefficients and stardard errors estimated with the Pooled OLS using the full sample. A variable is considered significant if the ratio between its coefficient and its standard error is equal to or greater than 2.

Fixed Effects Regression

Short-run

Create a table

# Create a list containing all Fixed Effects regression models
short_run_models <- c("Firm_value ~ Green_inn", 
                      "Firm_value ~ Green_inn + Location + Age + Sales_growth +
                       Leverage + Year + ROA + Size")

# Create a data frame containing estimates and standard errors
table6 <- data.frame(NULL)

Full sample

# For-loop to add values to the table
for (i in 1:2) {
  
  model <- plm(
    formula = as.formula(paste(short_run_models[[i]])),
    data = data, index = c("Firm_id", "Year"), 
    model = "within", effect = "twoways", 
    na.action = na.exclude)
  
  # robust standard errors
  y <- coeftest(model, vcov. = vcovHC, type = "HC1")
  
  # assigning values to the table
  table6[i, 1] <- y[1] %>% round(3)     
  table6[i, 2] <- y[1, 2] %>% round(5)     
}

Young sample

# For-loop to add values to the table
for (i in 1:2) {
  
  model <- plm(
    formula = as.formula(paste(short_run_models[[i]])),
    data = young_group, index = c("Firm_id", "Year"),  
    model = "within", effect = "twoways", 
    na.action = na.exclude)
  
  # robust standard errors
  y <- coeftest(model, vcov. = vcovHC, type = "HC1")
  
  # assigning values to the table
  table6[i, 3] <- y[1] %>% round(3)     
  table6[i, 4] <- y[1, 2] %>% round(5)     
}

Old sample

# For-loop to add values to the table
for (i in 1:2) {
  
  model <- plm(
    formula = as.formula(paste(short_run_models[[i]])),
    data = old_group, index = c("Firm_id", "Year"), 
    model = "within", effect = "twoways", 
    na.action = na.exclude)
  
  # robust standard errors
  y <- coeftest(model, vcov. = vcovHC, type = "HC1")
  
  # assigning values to the table
  table6[i, 5] <- y[1] %>% round(3)     
  table6[i, 6] <- y[1, 2] %>% round(5)      
}

Translate into table

# Format table
  # for loops to place robust standard errors below the corresponding coefficients
for (i in 1:4) {
  for (k in seq(1, 6, 2)) {
    
  table6[i, k] <- paste(
    table6[i, k], "<br>(",
    table6[i, k +1], ")",
    sep = "")
  }
}

  # generate line breaks in the cell between the coefficient and standard error
table6[, 1] <- linebreak(table6[, 1])
table6[, 3] <- linebreak(table6[, 3])
table6[, 5] <- linebreak(table6[, 5])

  # remove columns displaying robust standard errors
table6 <- table6 %>% select(-2, -4, -6) 

  # removing any values from the cells that do not have any values
table6 <- table6[1:2, ]

  # add new columns reporting number of observations
table6[3, ] <- c(nrow(data), nrow(young_group), nrow(old_group))

  # naming the rows
rownames(table6) <- c("GI", "GI + Controls", "<b>Observations<b>")
colnames(table6) <- c("<b>Full sample<b>", "<b>Young<b>", "<b>Old<b>")

  # styling table
table6 <- kable(
  x = table6,
  format = "html",
  size = 10, escape = FALSE,
  caption = "<b>TABLE 6: <i> Short-run Effects of Green Innovation",
  align = "c") %>%
  kable_classic(full_width = F, html_font = "calibri", position = "left") %>%

  # notify variable columns
  # add_header_above(c( "Full sample" = 1, "Young" = 1, "Old" = 1), bold = TRUE) %>%

  # notify variable rows
  pack_rows("Baseline", 1, 1, bold = TRUE) %>%
  pack_rows("Extended", 2, 2, bold = TRUE) %>%
  
  # footnotes for table 
  footnote(general = "The table reports the Fixed Effects estimates in the same year of GI investment decisions. Robust standard errors are reported in parentheses. A variable is considered significant if the ratio between its coefficient and its standard error is equal to or greater than 2.",
  footnote_as_chunk = TRUE)
    
  # save table
save_kable(x = table6, file = "Table 6.png", zoom = 1.5)

# print table
table6
TABLE 6: Short-run Effects of Green Innovation
Full sample Young Old
Baseline
GI 5.619
(1.71371)
5.685
(1.78421)
2.107
(2.68575)
Extended
GI + Controls 1.126
(1.30123)
1.124
(1.33059)
0.403
(2.77463)
Observations 646 576 70
Note: The table reports the Fixed Effects estimates in the same year of GI investment decisions. Robust standard errors are reported in parentheses. A variable is considered significant if the ratio between its coefficient and its standard error is equal to or greater than 2.

Long-run

Create a table

# Create a list of long-run effect models
base_models <- c("Firm_value ~ lag(Green_inn, 1)",
                 "Firm_value ~ lag(Green_inn, 2)",
                 "lead(Firm_value, 1) ~ Green_inn",
                 "lead(Firm_value, 2) ~ Green_inn",
        
                 "Firm_value ~ lag(Green_inn, 1)",
                 "Firm_value ~ lag(Green_inn, 2)",
                 "lead(Firm_value, 1) ~ Green_inn",
                 "lead(Firm_value, 2) ~ Green_inn",
        
                 "Firm_value ~ lag(Green_inn, 1)",
                 "Firm_value ~ lag(Green_inn, 2)",
                 "lead(Firm_value, 1) ~ Green_inn",
                 "lead(Firm_value, 2) ~ Green_inn")
        
ext_models <- c("Firm_value ~ lag(Green_inn, 1) + Location + Age + Year + 
                Sales_growth + Leverage + ROA + Size",
                "Firm_value ~ lag(Green_inn, 2) + Location + Age + Year +
                Sales_growth + Leverage + ROA + Size",
                "lead(Firm_value, 1) ~ Green_inn + Location + Age + Year +
                Sales_growth + Leverage + ROA + Size",
                "lead(Firm_value, 2) ~ Green_inn + Location + Age + Year +
                Sales_growth + Leverage + ROA + Size",
                
                "Firm_value ~ lag(Green_inn, 1) + Location + Age + Year + 
                Sales_growth + Leverage + ROA + Size",
                "Firm_value ~ lag(Green_inn, 2) + Location + Age + Year +
                Sales_growth + Leverage + ROA + Size",
                "lead(Firm_value, 1) ~ Green_inn + Location + Age + Year +
                Sales_growth + Leverage + ROA + Size",
                "lead(Firm_value, 2) ~ Green_inn + Location + Age + Year +
                Sales_growth + Leverage + ROA + Size",
                
                "Firm_value ~ lag(Green_inn, 1) + Location + Age + Year + 
                Sales_growth + Leverage + ROA + Size",
                "Firm_value ~ lag(Green_inn, 2) + Location + Age + Year +
                Sales_growth + Leverage + ROA + Size",
                "lead(Firm_value, 1) ~ Green_inn + Location + Age + Year +
                Sales_growth + Leverage + ROA + Size",
                "lead(Firm_value, 2) ~ Green_inn + Location + Age + Year +
                Sales_growth + Leverage + ROA + Size")

# Create a data frame to contain the results
table7 <- data.frame(NULL)

Baseline models

# For-loop to add values to the table
  # Full sample
for (i in 1:4) {
  
  model <- plm(
    formula = as.formula(paste(base_models[[i]])),
    data = data, index = c("Firm_id", "Year"), 
    model = "within", effect = "twoways", 
    na.action = na.exclude)
  
  # robust standard errors
  y <- coeftest(model, vcov. = vcovHC, type = "HC1")
  
  # assigning values to the table
  table7[1, i] <- y[1] %>% round(3)     
  table7[2, i] <- y[2] %>% round(4)     
}

  # Young sample
for (i in 5:8) {
  
  model <- plm(
    formula = as.formula(paste(base_models[[i]])),
    data = young_group, index = c("Firm_id", "Year"), 
    model = "within", effect = "twoways", 
    na.action = na.exclude)
  
  # robust standard errors
  y <- coeftest(model, vcov. = vcovHC, type = "HC1")
  
  # assigning values to the table
  table7[1, i] <- y[1] %>% round(3)     # coef
  table7[2, i] <- y[2] %>% round(4)     # std
}

  # Old sample
for (i in 9:12) {
  
  model <- plm(
    formula = as.formula(paste(base_models[[i]])),
    data = old_group, index = c("Firm_id", "Year"), 
    model = "within", effect = "twoways", 
    na.action = na.exclude)
  
  # robust standard errors 
  y <- coeftest(model, vcov. = vcovHC, type = "HC1")
  
  # assigning values to the table
  table7[1, i] <- y[1] %>% round(3)     # coef
  table7[2, i] <- y[2] %>% round(4)     # std
}

Extended models

# For-loop to add values to the table
  # Full sample
for (i in 1:4) {
  
  model <- plm(
    formula = as.formula(paste(ext_models[[i]])),
    data = data, index = c("Firm_id", "Year"), 
    model = "within", effect = "twoways", 
    na.action = na.exclude)
  
  # robust standard errors & number of observations
  y <- coeftest(model, vcov. = vcovHC, type = "HC1")
  obs <- nobs(model)
  
  # assigning values to the table
  table7[3, i] <- y[1] %>% round(3)     
  table7[4, i] <- y[2] %>% round(5)  
  table7[5, i] <- obs
}

  # Young sample
for (i in 5:8) {
  
  model <- plm(
    formula = as.formula(paste(ext_models[[i]])),
    data = young_group, index = c("Firm_id", "Year"), 
    model = "within", effect = "twoways", 
    na.action = na.exclude)
  
  # robust standard errors
  y <- coeftest(model, vcov. = vcovHC, type = "HC1")
  obs <- nobs(model)
  
  # assigning values to the table
  table7[3, i] <- y[1] %>% round(3)     # coef
  table7[4, i] <- y[2] %>% round(5)     # std
  table7[5, i] <- obs
}

  # Old sample
for (i in 9:12) {
  
  model <- plm(
    formula = as.formula(paste(ext_models[[i]])),
    data = old_group, index = c("Firm_id", "Year"), 
    model = "within", effect = "twoways", 
    na.action = na.exclude)
  
  # robust standard errors & number of observations
  y <- coeftest(model, vcov. = vcovHC, type = "HC1")
  obs <- nobs(model)
  
  # assigning values to the table
  table7[3, i] <- y[1] %>% round(3)     
  table7[4, i] <- y[2] %>% round(5)     
  table7[5, i] <- obs
}

Translate into table

# Format table
  # for loops to place robust standard errors below the corresponding coefficients
for (i in 1:12) {
  
  table7[1, i] <- paste(
    table7[1, i], "<br>(",
    table7[2, i], ")",
    sep="")
  
  table7[3, i] <- paste(
    table7[3, i], "<br>(",
    table7[4, i], ")",
    sep="")
}

  # remove duplicate rows displaying robust standard errors
table7 <- table7[c(1, 3, 5), ]

  # naming rows & columns
rownames(table7) <- c("GI", "GI + Controls", "<b>Observations<b>")
colnames(table7) <- rep(c("(1)", "(2)"), times = 6)

  # styling table
table7 <- kable(
  x = table7,
  format = "html",
  size = 10, escape = FALSE,
  caption = "<b>TABLE 7: <i>Long-run effects of Green Innovation",
  align = "c") %>%
  kable_classic(full_width = F, html_font = "calibri", position = "left") %>%

  # notify variable columns
  add_header_above(c("", "Lag" = 2, "Lead" = 2, "Lag" = 2, 
                     "Lead" = 2, "Lag" = 2, "Lead" = 2), bold = TRUE) %>%
  add_header_above(c("", "Full sample" = 4, "Young" = 4, "Old" = 4), bold = TRUE) %>%

  # notify variable rows
  pack_rows("Baseline", 1, 1, bold = TRUE) %>%
  pack_rows("Extended", 2, 2, bold = TRUE) %>%
  
  # footnotes for table 
  footnote(general = "The table reports the Fixed Effects estimates using (1) or (2) years of lagged or leading values of variables of interest. Robust standard errors are reported in parentheses. A variable is considered significant if the ratio between its coefficient and its standard error is equal to or greater than 2.",
  footnote_as_chunk = TRUE)
    
  # save table
save_kable(x = table7, file = "Table 7.png", zoom = 1.5)

# print table
table7
TABLE 7: Long-run effects of Green Innovation
Full sample
Young
Old
Lag
Lead
Lag
Lead
Lag
Lead
Baseline
GI 4.613
(1.7279)
4.709
(1.9057)
4.613
(1.7279)
4.709
(1.9057)
4.772
(1.7981)
4.934
(1.9776)
4.772
(1.7981)
4.934
(1.9776)
1.815
(2.3061)
0.512
(1.3377)
1.815
(2.3061)
0.512
(1.3377)
Extended
GI + Controls 1.874
(-5e-05)
2.23
(-5e-05)
3.433
(3e-05)
4.763
(1e-05)
1.943
(-5e-05)
2.264
(-5e-05)
3.608
(3e-05)
4.985
(1e-05)
0.294
(-0.03218)
1.53
(-0.11658)
-0.688
(0.08322)
-2.486
(0.14893)
Observations 582 530 582 530 517 470 517 470 65 60 65 60
Note: The table reports the Fixed Effects estimates using (1) or (2) years of lagged or leading values of variables of interest. Robust standard errors are reported in parentheses. A variable is considered significant if the ratio between its coefficient and its standard error is equal to or greater than 2.

Robustness Checks

Data skewness

Create a table

# Create a data frame containing estimates and standard errors
table8 <- data.frame(NULL)

Short-run effects

# For-loop to add values to the table
  # original data
for (i in 1) {
  
  model <- plm(
    formula = "Firm_value ~ Green_inn",
    data = data, index = c("Firm_id", "Year"), 
    model = "within", effect = "twoways", 
    na.action = na.exclude)
  
  # robust standard errors
  y <- coeftest(model, vcov. = vcovHC, type = "HC1")
  obs <- nobs(model)
  
  # assigning values to the table
  table8[i, 1] <- y[1] %>% round(3)  
  table8[i, 2] <- y[2] %>% round(3) 
  table8[3, 1] <- obs
}

  # cleaned data
for (i in 1) {
  
  model <- plm(
    formula = "Firm_value ~ Green_inn",
    data = cleaned_data, index = c("Firm_id", "Year"), 
    model = "within", effect = "twoways", 
    na.action = na.exclude)
  
  # robust standard errors
  y <- coeftest(model, vcov. = vcovHC, type = "HC1")
  
  # assigning values to the table
  table8[i, 3] <- y[1] %>% round(3)    
  table8[i, 4] <- y[2] %>% round(3)
  table8[3, 3] <- obs
}

Long run effects

# Create lists of models estimating long-run effects of GII
  # models with lags
lag_base <- c("Firm_value ~ lag(Green_inn, 1)",
                 "Firm_value ~ lag(Green_inn, 2)")
lag_ext <- c("Firm_value ~ lag(Green_inn, 1) + Location + Age + Year + 
             Sales_growth + Leverage + ROA + Size",
             "Firm_value ~ lag(Green_inn, 2) + Location + Age + Year +
             Sales_growth + Leverage + ROA + Size")

  # models with leads
lead_base <- c("lead(Firm_value, 1) ~ Green_inn",
                 "lead(Firm_value, 2) ~ Green_inn")
lead_ext <- c("lead(Firm_value, 1) ~ Green_inn + Location + Age + Year +
                Sales_growth + Leverage + ROA + Size",
                "lead(Firm_value, 2) ~ Green_inn + Location + Age + Year +
                Sales_growth + Leverage + ROA + Size")

i. Models with lags

# For-loop to add values to the table
# Baseline
  ## (1) original data
for (i in 1:2) {
  
  model <- plm(
    formula = as.formula(paste(lag_base[[i]])),
    data = data, index = c("Firm_id", "Year"), 
    model = "within", effect = "twoways", 
    na.action = na.exclude)
  
  # robust standard errors & number of observations
  y <- coeftest(model, vcov. = vcovHC, type = "HC1")
  
  # assigning values to the table
  table8[i, 5] <- y[1] %>% round(3)     
  table8[i, 6] <- y[2] %>% round(3)
  table8[3, 5] <- obs
}

  ## (2) cleaned data
for (i in 1:2) {
  
  model <- plm(
    formula = as.formula(paste(lag_base[[i]])),
    data = cleaned_data, index = c("Firm_id", "Year"), 
    model = "within", effect = "twoways", 
    na.action = na.exclude)
  
  # robust standard errors
  y <- coeftest(model, vcov. = vcovHC, type = "HC1")
  
  # assigning values to the table
  table8[i, 7] <- y[1] %>% round(3)    
  table8[i, 8] <- y[2] %>% round(5)   
  table8[3, 7] <- obs
}

  # Extended models
  ## (1) original data
for (i in 1:2) {
  
  model <- plm(
    formula = as.formula(paste(lag_ext[[i]])),
    data = data, index = c("Firm_id", "Year"), 
    model = "within", effect = "twoways", 
    na.action = na.exclude)
  
  # robust standard errors & number of observations
  y <- coeftest(model, vcov. = vcovHC, type = "HC1")
  
  # assigning values to the table
  table8[i, 9] <- y[1] %>% round(3)     
  table8[i, 10] <- y[2] %>% round(5) 
  table8[3, 9] <- obs
}

  ## (2) cleaned data
for (i in 1:2) {
  
  model <- plm(
    formula = as.formula(paste(lag_ext[[i]])),
    data = cleaned_data, index = c("Firm_id", "Year"), 
    model = "within", effect = "twoways", 
    na.action = na.exclude)
  
  # robust standard errors
  y <- coeftest(model, vcov. = vcovHC, type = "HC1")
  
  # assigning values to the table
  table8[i, 11] <- y[1] %>% round(3)    
  table8[i, 12] <- y[2] %>% round(5) 
  table8[3, 11] <- obs
}

ii. Models with leads

# For-loop to add values to the table
# Baseline
  ## (1) original data
for (i in 1:2) {
  
  model <- plm(
    formula = as.formula(paste(lead_base[[i]])),
    data = data, index = c("Firm_id", "Year"), 
    model = "within", effect = "twoways", 
    na.action = na.exclude)
  
  # robust standard errors & number of observations
  y <- coeftest(model, vcov. = vcovHC, type = "HC1")
  
  # assigning values to the table
  table8[i, 13] <- y[1] %>% round(3)     
  table8[i, 14] <- y[2] %>% round(3) 
  table8[3, 13] <- obs
}

  ## (2) cleaned data
for (i in 1:2) {
  
  model <- plm(
    formula = as.formula(paste(lead_base[[i]])),
    data = cleaned_data, index = c("Firm_id", "Year"), 
    model = "within", effect = "twoways", 
    na.action = na.exclude)
  
  # robust standard errors
  y <- coeftest(model, vcov. = vcovHC, type = "HC1")
  
  # assigning values to the table
  table8[i, 15] <- y[1] %>% round(3)    
  table8[i, 16] <- y[2] %>% round(5)  
  table8[3, 15] <- obs
}

# Extended models
  ## (1) original data
for (i in 1:2) {
  
  model <- plm(
    formula = as.formula(paste(lead_ext[[i]])),
    data = data, index = c("Firm_id", "Year"), 
    model = "within", effect = "twoways", 
    na.action = na.exclude)
  
  # robust standard errors & number of observations
  y <- coeftest(model, vcov. = vcovHC, type = "HC1")
  
  # assigning values to the table
  table8[i, 17] <- y[1] %>% round(3)     
  table8[i, 18] <- y[2] %>% round(5)  
  table8[3, 17] <- obs
}

  ## (2) cleaned data
for (i in 1:2) {
  
  model <- plm(
    formula = as.formula(paste(lead_ext[[i]])),
    data = cleaned_data, index = c("Firm_id", "Year"), 
    model = "within", effect = "twoways", 
    na.action = na.exclude)
  
  # robust standard errors
  y <- coeftest(model, vcov. = vcovHC, type = "HC1")
  
  # assigning values to the table
  table8[i, 19] <- y[1] %>% round(3)    
  table8[i, 20] <- y[2] %>% round(5) 
  table8[3, 19] <- obs
}

Translate into table

# Format table

  # for loops to place robust standard errors below the corresponding coefficients
for (i in 1:2) {
  for (k in seq(1, 20, 2)) {
    
  table8[i, k] <- paste(
    table8[i, k], "<br>(",
    table8[i, k + 1], ")",
    sep="")
  }
}

  # remove duplicate rows displaying robust standard errors
table8 <- table8[-seq(2, 20, 2)]

  # remove NA values
table8[2, 1:2] <- " "
 
  # name the rows
colnames(table8) <- rep(c("Original", "Cleaned"), times = 5)
rownames(table8) <- c("GI", "GI + Controls", "<b>Observations<b>")

  # styling table
table8 <- kable(
  x = table8,
  format = "html",
  size = 10, escape = FALSE,
  caption = "<b>TABLE 8: <i>Robustness check - Data skewness & Log transformation",
  align = "c") %>%
  kable_classic(full_width = F, html_font = "calibri", position = "left") %>%

  # notify variable columns
  add_header_above(c("", "", "", "(1)" = 2, "(2)" = 2, "(1)" = 2, "(2)" = 2), 
                   bold = TRUE) %>%
  add_header_above(c("", "", "", "Lag" = 4,  "Lead" = 4), bold = TRUE) %>%
  add_header_above(c("", "Short-run" = 2, "Long-run" = 8), 
                   bold = TRUE) %>%

  # notify variable rows
  pack_rows("Baseline", 1, 1, bold = TRUE) %>%
  pack_rows("Extended", 2, 2, bold = TRUE) %>%
  
  # footnotes for table 
  footnote(general = "The table reports the estimates and robust standard errors using Original data and Cleaned data. The Cleaned data is the Original data excluding observations with over 4000 patent counts. A variable is considered significant if the ratio between its coefficient and its standard error is equal to or greater than 2.",
  footnote_as_chunk = TRUE)
    
  # save table
save_kable(x = table8, file = "Table 8.png", zoom = 1.5)

# print table
table8
TABLE 8: Robustness check - Data skewness & Log transformation
Short-run
Long-run
Lag
Lead
(1)
(2)
(1)
(2)
Original Cleaned Original Cleaned Original Cleaned Original Cleaned Original Cleaned
Baseline
GI 5.619
(1.714)
5.66
(1.708)
4.613
(1.728)
4.574
(1.69798)
1.874
(-5e-05)
1.843
(-5e-05)
4.613
(1.728)
4.574
(1.69798)
3.433
(3e-05)
3.368
(3e-05)
Extended
GI + Controls 4.709
(1.906)
4.825
(1.89027)
2.23
(-5e-05)
2.278
(-4e-05)
4.709
(1.906)
4.825
(1.89027)
4.763
(1e-05)
4.795
(3e-05)
Observations 646 646 646 646 646 646 646 646 646 646
Note: The table reports the estimates and robust standard errors using Original data and Cleaned data. The Cleaned data is the Original data excluding observations with over 4000 patent counts. A variable is considered significant if the ratio between its coefficient and its standard error is equal to or greater than 2.

Random & Fixed Effects

Create a table

# Create a list of models
base_model <- c("Firm_value ~ Green_inn")
        
ext_model <- c("Firm_value ~ Green_inn + Location + Age + Year + 
                Sales_growth + Leverage + ROA + Size")

# Create a data frame to contain the results
table9 <- data.frame(NULL)

Baseline models

# For-loop to add values to the table
  # Full sample
  ## (1) fixed
for (i in 1) {
  
  model <- plm(
    formula = as.formula(paste(base_model)),
    data = data, index = c("Firm_id", "Year"), 
    model = "within", effect = "twoways", 
    na.action = na.exclude)
  
  # robust standard errors 
  y <- coeftest(model, vcov. = vcovHC, type = "HC1")
  
  # assigning values to the table
  table9[i, 1] <- y[1] %>% round(3)  
  table9[i, 2] <- y[2] %>% round(3)  
}

  ## (2) random 
for (i in 1) {
  
  model <- plm(
    formula = as.formula(paste(base_model)),
    data = data, index = c("Firm_id", "Year"), 
    model = "random", effect = "twoways", 
    random.method = "walhus")
  
  # robust standard errors
  y <- coeftest(model, vcov. = vcovHC, type = "HC1")
  
  # assigning values to the table
  table9[i, 3] <- y[2] %>% round(3)    
  table9[i, 4] <- y[2, 2] %>% round(3) 
}

  # Young sample
  ## (1) fixed
for (i in 1) {
  
  model <- plm(
    formula = as.formula(paste(base_model)),
    data = young_group, index = c("Firm_id", "Year"), 
    model = "within", effect = "twoways", 
    na.action = na.exclude)
  
  # robust standard errors & number of observations
  y <- coeftest(model, vcov. = vcovHC, type = "HC1")
  
  # assigning values to the table
  table9[i, 5] <- y[1] %>% round(3)  
  table9[i, 6] <- y[2] %>% round(3)    
}

  ## (2) random 
for (i in 1) {
  
  model <- plm(
    formula =as.formula(paste(base_model)),
    data = young_group, index = c("Firm_id", "Year"), 
    model = "random", effect = "twoways", 
    random.method = "walhus")
  
  # robust standard errors
  y <- coeftest(model, vcov. = vcovHC, type = "HC1")
  
  # assigning values to the table
  table9[i, 7] <- y[2] %>% round(3)    
  table9[i, 8] <- y[2, 2] %>% round(3) 
}

  # Old sample
  ## (1) fixed
for (i in 1) {
  
  model <- plm(
    formula = as.formula(paste(base_model)),
    data = old_group, index = c("Firm_id", "Year"), 
    model = "within", effect = "twoways", 
    na.action = na.exclude)
  
  # robust standard errors & number of observations
  y <- coeftest(model, vcov. = vcovHC, type = "HC1")

  # assigning values to the table
  table9[i, 9] <- y[1] %>% round(3)  
  table9[i, 10] <- y[2] %>% round(3) 
}

  ## (2) random 
for (i in 1) {
  
  model <- plm(
    formula = as.formula(paste(base_model)),
    data = old_group, index = c("Firm_id", "Year"), 
    model = "random", effect = "twoways", 
    random.method = "walhus")
  
  # robust standard errors
  y <- coeftest(model, vcov. = vcovHC, type = "HC1")
  
  # assigning values to the table
  table9[i, 11] <- y[2] %>% round(3)    
  table9[i, 12] <- y[2, 2] %>% round(3)    
}

Extended models

# For-loop to add values to the table
  # Full sample
  ## (1) fixed
for (i in 2) {
  
  model <- plm(
    formula = as.formula(paste(ext_model)),
    data = data, index = c("Firm_id", "Year"), 
    model = "within", effect = "twoways", 
    na.action = na.exclude)
  
  # robust standard errors & number of observations
  y <- coeftest(model, vcov. = vcovHC, type = "HC1")
  obs <- nobs(model)
  
  # assigning values to the table
  table9[i, 1] <- y[1] %>% round(3)  
  table9[i, 2] <- y[2] %>% round(5) 
  table9[3, 1:2] <- obs
}

  ## (2) random 
for (i in 2) {
  
  model <- plm(
    formula = as.formula(paste(ext_model)),
    data = data, index = c("Firm_id", "Year"), 
    model = "random", effect = "twoways", 
    random.method = "walhus")
  
  # robust standard errors
  y <- coeftest(model, vcov. = vcovHC, type = "HC1")
  
  # assigning values to the table
  table9[i, 3] <- y[2] %>% round(3)    
  table9[i, 4] <- y[2, 2] %>% round(3) 
  table9[3, 3:4] <- obs
}

  # Young sample
  ## (1) fixed
for (i in 2) {
  
  model <- plm(
    formula = as.formula(paste(ext_model)),
    data = young_group, index = c("Firm_id", "Year"), 
    model = "within", effect = "twoways", 
    na.action = na.exclude)
  
  # robust standard errors & number of observations
  y <- coeftest(model, vcov. = vcovHC, type = "HC1")
  obs <- nobs(model)
  
  # assigning values to the table
  table9[i, 5] <- y[1] %>% round(3)  
  table9[i, 6] <- y[2] %>% round(5)    
  table9[3, 5:6] <- obs
}

  ## (2) random 
for (i in 2) {
  
  model <- plm(
    formula = as.formula(paste(ext_model)),
    data = young_group, index = c("Firm_id", "Year"), 
    model = "random", effect = "twoways", 
    random.method = "walhus")
  
  # robust standard errors
  y <- coeftest(model, vcov. = vcovHC, type = "HC1")
  
  # assigning values to the table
  table9[i, 7] <- y[2] %>% round(3)    
  table9[i, 8] <- y[2, 2] %>% round(3)   
  table9[3, 7:8] <- obs
}

  # Old sample
  ## (1) fixed
for (i in 2) {
  
  model <- plm(
    formula = as.formula(paste(ext_model)),
    data = old_group, index = c("Firm_id", "Year"), 
    model = "within", effect = "twoways", 
    na.action = na.exclude)
  
  # robust standard errors & number of observations
  y <- coeftest(model, vcov. = vcovHC, type = "HC1")
  obs <- nobs(model)
  
  # assigning values to the table
  table9[i, 9] <- y[1] %>% round(3)  
  table9[i, 10] <- y[2] %>% round(3) 
  table9[3, 9:10] <- obs
}

  ## (2) random 
for (i in 2) {
  
  model <- plm(
    formula = as.formula(paste(ext_model)),
    data = old_group, index = c("Firm_id", "Year"), 
    model = "random", effect = "twoways", 
    random.method = "walhus")
  
  # robust standard errors
  y <- coeftest(model, vcov. = vcovHC, type = "HC1")
  
  # assigning values to the table
  table9[i, 11] <- y[2] %>% round(3)    
  table9[i, 12] <- y[2, 2] %>% round(3)    
  table9[3, 11:12] <- obs
}

Translate into table

# Format table
  # for loops to place robust standard errors below the corresponding coefficients
for (i in 1:2) {
  for (k in seq(1, 12, 2)) {
    
  table9[i, k] <- paste(
    table9[i, k], "<br>(",
    table9[i, k + 1], ")",
    sep="")
  }
}

  # remove duplicate rows displaying robust standard errors
table9 <- table9[-seq(2, 12, 2)]


  # naming rows & columns
colnames(table9) <- rep(c("Fixed", "Random"), times = 3)
rownames(table9) <- c("GI", "GI + Controls", "<b>Observations<b>")

  # styling table
table9 <- kable(
  x = table9,
  format = "html",
  size = 10, escape = FALSE,
  caption = "<b>TABLE 9: <i>Fixed Effects and Random Effects Comparison",
  align = "c") %>%
  kable_classic(full_width = F, html_font = "calibri", position = "left") %>%

  # notify variable columns
  add_header_above(c("", "Full sample" = 2, "Young" = 2, "Old" = 2), bold = TRUE) %>%

  # notify variable rows
  pack_rows("Baseline", 1, 1, bold = TRUE) %>%
  pack_rows("Extended", 2, 2, bold = TRUE) %>%
  
  # footnotes for table 
  footnote(general = "The table reports the Fixed Effects and Random Effects estimates. Robust standard errors are reported in parentheses. A variable is considered significant if the ratio between its coefficient and its standard error is equal to or greater than 2.",
  footnote_as_chunk = TRUE)
    
  # save table
save_kable(x = table9, file = "Table 9.png", zoom = 1.5)

# print table
table9
TABLE 9: Fixed Effects and Random Effects Comparison
Full sample
Young
Old
Fixed Random Fixed Random Fixed Random
Baseline
GI 5.619
(1.714)
-1.273
(1.044)
5.685
(1.784)
-1.42
(1.21)
2.107
(2.686)
2.457
(0.139)
Extended
GI + Controls 1.126
(-4e-05)
-0.69
(0.788)
1.124
(-4e-05)
-1.064
(0.895)
0.403
(-0.007)
0.403
(3.263)
Observations 646 646 576 576 70 70
Note: The table reports the Fixed Effects and Random Effects estimates. Robust standard errors are reported in parentheses. A variable is considered significant if the ratio between its coefficient and its standard error is equal to or greater than 2.

Section VI: Conclusions

Estimator comparison

  • Pooled OLS: The estimates of the dependent variable in the naive OLS regression of firm value on GI without any fixed effects are negative. However, the OLS model is very low fit and \(\bar{R}^{2}\) increases tremendously when control variables are added into the baseline model (\(\bar{R}^{2}_{baseline}\) = \(0.08572\), \(\bar{R}^{2}_{extended}\) = \(0.6083\)). This indeed confirms the existence of upward bias.

  • Random Effects: GI seems to not have any impact on the value of a biotech company. Only in the baseline model on the old sample that the estimate is significant positive. The Hausman test, however, is not in favor of this method. As a results, its results and inferences are considered less important than the Fixed Effects’.

  • Fixed Effects: Scientifically proved to be efficient in addressing unobervable heterogeneity, a type of OVB, Fixed Effects Model is appropriate to be employed in panel data. The outcomes obtained in robustness check are also supportive of this regressor, implying that this method should be preferred.

\(\implies\) Fixed Effects Regression is an appropriate technique and its results are robust.


Hypothesis summary

  • Hypothesis 1: Rejected. GI does not inversely impact current value of biotech companies.

  • Hypothesis 2: Partly supported. GI can eventually generate favorable outcomes to businesses. However, this is only applicable to younger firms.

  • Hypothesis 3: Supported. The effects of GI vary between firms of different age groups. Younger corporations are likely to benefit from GI whereas the older competitors experience no benefits.

\(\implies\) It is essential to consider which age group a firm is in when evaluating the potential contribution of GI to its financial performance.


KEY FINDINGS

  • The impact of green innovation (GI) is not instantaneously visible. It takes at least a year for the effects to show up, if there are any.

  • The estimates produced with the OLS regression tend to be upward biased, thus highly unreliable.

  • Compared to the naive Pooled OLS, Fixed Effects Regression works more efficiently in terms of addressing OVB. It helps get rid of biases caused by both observable and unobservable factors, thus guarantee causal inferences.

  • Lead and lag models produce different results. Regressions employing 1 and 2 year lags of GI demonstrate that the future value of a firm is independent of their today’s investment decisions, regardless of their age. Conversely, all equations with 1 and 2-year leads of firm value confirm the constructive role of GI. However, this is only applicable to younger enterprises.

  • Performance of longer-existing firms are unlikely to depend on their GI levels. Instead, other financial criteria, particularly leverage ratio and firm size, matter more to their long-term success.

  • Compared to the Random Effects, Fixed Effects are also more reliable.

\(\implies\) Estimates on variables of interest produced by the Two-way Fixed Effects Models evidence that eco-based technological development does not generate negative outcomes to biotechnology corporations. Under certain circumstances, it can even boost business performance.


References

Arnold, M. (2019, March 12). 10 Regression with Panel Data | Introduction to Econometrics with R. Econometrics-with-r.org. https://www.econometrics-with-r.org/10-rwpd.html

Library of Statistical Techniques (LOST). Fixed Effects in Linear Regression. https://lost-stats.github.io/Model_Estimation/OLS/fixed_effects_in_linear_regression.html#:~:text=Fixed%20effects%20is%20a%20statistical Schweinberger, Martin. (2022). Fixed- and Mixed-Effects Regression Models in R. Brisbane: The University of Queensland.

Additional Links https://slcladal.github.io/regression.html#Example_1:_Preposition_Use_across_Real-Time

https://cran.r-project.org/web/packages/sjPlot/vignettes/tab_model_estimates.html

https://clanfear.github.io/r_exposure_workshop/lectures/r3/r3_1/r_exposure_3_1_loops.html#42